Fix: #14523 Add alt-text codeblock to Images for workflow_template.rst (#14604 )

* add alt to images in workflow_templates.rst Signed-off-by: Ratan Gulati <ratangulati.dev@gmail.com> * add alt to images in workflow_templates.rst Signed-off-by: Ratan Gulati <ratangulati.dev@gmail.com> * Update workflow_templates.rst * Revised proposed alt text for workflow_templates.rst --------- Signed-off-by: Ratan Gulati <ratangulati.dev@gmail.com> Co-authored-by: TVo <thavo@redhat.com>
Removed references to images that were deleted.
2026-02-04 19:18:13 -03:30 · 2023-11-07 10:51:11 -07:00 · 2023-11-07 08:55:27 -07:00 · 2023-11-07 08:55:27 -07:00 · 2023-11-07 08:14:34 -07:00 · 2023-11-07 08:14:34 -07:00
1096 changed files with 25572 additions and 3134 deletions
--- a/.github/actions/awx_devel_image/action.yml
+++ b/.github/actions/awx_devel_image/action.yml
@@ -0,0 +1,28 @@
+name: Setup images for AWX
+description: Builds new awx_devel image
+inputs:
+  github-token:
+    description: GitHub Token for registry access
+    required: true
+runs:
+  using: composite
+  steps:
+    - name: Get python version from Makefile
+      shell: bash
+      run: echo py_version=`make PYTHON_VERSION` >> $GITHUB_ENV
+
+    - name: Log in to registry
+      shell: bash
+      run: |
+        echo "${{ inputs.github-token }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
+
+    - name: Pre-pull latest devel image to warm cache
+      shell: bash
+      run: docker pull ghcr.io/${{ github.repository_owner }}/awx_devel:${{ github.base_ref }}
+
+    - name: Build image for current source checkout
+      shell: bash
+      run: |
+        DEV_DOCKER_TAG_BASE=ghcr.io/${{ github.repository_owner }} \
+        COMPOSE_TAG=${{ github.base_ref }} \
+        make docker-compose-build
--- a/.github/actions/run_awx_devel/action.yml
+++ b/.github/actions/run_awx_devel/action.yml
@@ -0,0 +1,73 @@
+name: Run AWX docker-compose
+description: Runs AWX with `make docker-compose`
+inputs:
+  github-token:
+    description: GitHub Token to pass to awx_devel_image
+    required: true
+  build-ui:
+    description: Should the UI be built?
+    required: false
+    default: false
+    type: boolean
+outputs:
+  ip:
+    description: The IP of the tools_awx_1 container
+    value: ${{ steps.data.outputs.ip }}
+  admin-token:
+    description: OAuth token for admin user
+    value: ${{ steps.data.outputs.admin_token }}
+runs:
+  using: composite
+  steps:
+    - name: Build awx_devel image for running checks
+      uses: ./.github/actions/awx_devel_image
+      with:
+        github-token: ${{ inputs.github-token }}
+
+    - name: Upgrade ansible-core
+      shell: bash
+      run: python3 -m pip install --upgrade ansible-core
+
+    - name: Install system deps
+      shell: bash
+      run: sudo apt-get install -y gettext
+
+    - name: Start AWX
+      shell: bash
+      run: |
+        DEV_DOCKER_TAG_BASE=ghcr.io/${{ github.repository_owner }} \
+        COMPOSE_TAG=${{ github.base_ref }} \
+        COMPOSE_UP_OPTS="-d" \
+        make docker-compose
+
+    - name: Update default AWX password
+      shell: bash
+      run: |
+        while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' -k https://localhost:8043/api/v2/ping/)" != "200" ]]
+        do
+        echo "Waiting for AWX..."
+        sleep 5
+        done
+        echo "AWX is up, updating the password..."
+        docker exec -i tools_awx_1 sh <<-EOSH
+          awx-manage update_password --username=admin --password=password
+        EOSH
+
+    - name: Build UI
+      # This must be a string comparison in composite actions:
+      # https://github.com/actions/runner/issues/2238
+      if: ${{ inputs.build-ui == 'true' }}
+      shell: bash
+      run: |
+        docker exec -i tools_awx_1 sh <<-EOSH
+          make ui-devel
+        EOSH
+
+    - name: Get instance data
+      id: data
+      shell: bash
+      run: |
+        AWX_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' tools_awx_1)
+        ADMIN_TOKEN=$(docker exec -i tools_awx_1 awx-manage create_oauth2_token --user admin)
+        echo "ip=$AWX_IP" >> $GITHUB_OUTPUT
+        echo "admin_token=$ADMIN_TOKEN" >> $GITHUB_OUTPUT
--- a/.github/actions/upload_awx_devel_logs/action.yml
+++ b/.github/actions/upload_awx_devel_logs/action.yml
@@ -0,0 +1,19 @@
+name: Upload logs
+description: Upload logs from `make docker-compose` devel environment to GitHub as an artifact
+inputs:
+  log-filename:
+    description: "*Unique* name of the log file"
+    required: true
+runs:
+  using: composite
+  steps:
+    - name: Get AWX logs
+      shell: bash
+      run: |
+        docker logs tools_awx_1 > ${{ inputs.log-filename }}
+
+    - name: Upload AWX logs as artifact
+      uses: actions/upload-artifact@v3
+      with:
+        name: docker-compose-logs
+        path: ${{ inputs.log-filename }}
--- a/.github/pr_labeler.yml
+++ b/.github/pr_labeler.yml
@@ -15,5 +15,5 @@

 "dependencies":
  - any: ["awx/ui/package.json"]
-  - any: ["awx/requirements/*.txt"]
-  - any: ["awx/requirements/requirements.in"]
+  - any: ["requirements/*.txt"]
+  - any: ["requirements/requirements.in"]
--- a/.github/triage_replies.md
+++ b/.github/triage_replies.md
@@ -7,8 +7,8 @@

 ## PRs/Issues

-### Visit our mailing list
- Hello, this appears to be less of a bug report or feature request and more of a question. Could you please ask this on our mailing list? See https://github.com/ansible/awx/#get-involved for information for ways to connect with us.
+### Visit the Forum or Matrix
+- Hello, this appears to be less of a bug report or feature request and more of a question. Could you please ask this on either the [Ansible AWX channel on Matrix](https://matrix.to/#/#awx:ansible.com) or the [Ansible Community Forum](https://forum.ansible.com/tag/awx)?

 ### Denied Submission

--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -35,29 +35,40 @@ jobs:
          - name: ui-test-general
            command: make ui-test-general
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+
+      - name: Build awx_devel image for running checks
+        uses: ./.github/actions/awx_devel_image
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Run check ${{ matrix.tests.name }}
-        run: AWX_DOCKER_CMD='${{ matrix.tests.command }}' make github_ci_runner
+        run: AWX_DOCKER_CMD='${{ matrix.tests.command }}' make docker-runner

  dev-env:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+
+      - uses: ./.github/actions/run_awx_devel
+        id: awx
+        with:
+          build-ui: false
+          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Run smoke test
-        run: make github_ci_setup && ansible-playbook tools/docker-compose/ansible/smoke-test.yml -v
+        run: ansible-playbook tools/docker-compose/ansible/smoke-test.yml -v

  awx-operator:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout awx
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
        with:
          path: awx

      - name: Checkout awx-operator
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
        with:
          repository: ansible/awx-operator
          path: awx-operator
@@ -67,7 +78,7 @@ jobs:
        run: echo py_version=`make PYTHON_VERSION` >> $GITHUB_ENV

      - name: Install python ${{ env.py_version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.py_version }}

@@ -102,7 +113,7 @@ jobs:
    strategy:
      fail-fast: false
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

      # The containers that GitHub Actions use have Ansible installed, so upgrade to make sure we have the latest version.
      - name: Upgrade ansible-core
@@ -114,3 +125,137 @@ jobs:
          # needed due to cgroupsv2. This is fixed, but a stable release
          # with the fix has not been made yet.
          ANSIBLE_TEST_PREFER_PODMAN: 1
+
+  collection-integration:
+    name: awx_collection integration
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        target-regex:
+          - name: a-h
+            regex: ^[a-h]
+          - name: i-p
+            regex: ^[i-p]
+          - name: r-z0-9
+            regex: ^[r-z0-9]
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: ./.github/actions/run_awx_devel
+        id: awx
+        with:
+          build-ui: false
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Install dependencies for running tests
+        run: |
+          python3 -m pip install -e ./awxkit/
+          python3 -m pip install -r awx_collection/requirements.txt
+
+      - name: Run integration tests
+        run: |
+          echo "::remove-matcher owner=python::"  # Disable annoying annotations from setup-python
+          echo '[general]' > ~/.tower_cli.cfg
+          echo 'host = https://${{ steps.awx.outputs.ip }}:8043' >> ~/.tower_cli.cfg
+          echo 'oauth_token = ${{ steps.awx.outputs.admin-token }}' >> ~/.tower_cli.cfg
+          echo 'verify_ssl = false' >> ~/.tower_cli.cfg
+          TARGETS="$(ls awx_collection/tests/integration/targets | grep '${{ matrix.target-regex.regex }}' | tr '\n' ' ')"
+          make COLLECTION_VERSION=100.100.100-git COLLECTION_TEST_TARGET="--coverage --requirements $TARGETS" test_collection_integration
+        env:
+          ANSIBLE_TEST_PREFER_PODMAN: 1
+
+      # Upload coverage report as artifact
+      - uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: coverage-${{ matrix.target-regex.name }}
+          path: ~/.ansible/collections/ansible_collections/awx/awx/tests/output/coverage/
+
+      - uses: ./.github/actions/upload_awx_devel_logs
+        if: always()
+        with:
+          log-filename: collection-integration-${{ matrix.target-regex.name }}.log
+
+  collection-integration-coverage-combine:
+    name: combine awx_collection integration coverage
+    runs-on: ubuntu-latest
+    needs:
+      - collection-integration
+    strategy:
+      fail-fast: false
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Upgrade ansible-core
+        run: python3 -m pip install --upgrade ansible-core
+
+      - name: Download coverage artifacts
+        uses: actions/download-artifact@v3
+        with:
+          path: coverage
+
+      - name: Combine coverage
+        run: |
+          make COLLECTION_VERSION=100.100.100-git install_collection
+          mkdir -p ~/.ansible/collections/ansible_collections/awx/awx/tests/output/coverage
+          cd coverage
+          for i in coverage-*; do
+            cp -rv $i/* ~/.ansible/collections/ansible_collections/awx/awx/tests/output/coverage/
+          done
+          cd ~/.ansible/collections/ansible_collections/awx/awx
+          ansible-test coverage combine --requirements
+          ansible-test coverage html
+          echo '## AWX Collection Integration Coverage' >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          ansible-test coverage report >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          echo >> $GITHUB_STEP_SUMMARY
+          echo '## AWX Collection Integration Coverage HTML' >> $GITHUB_STEP_SUMMARY
+          echo 'Download the HTML artifacts to view the coverage report.' >> $GITHUB_STEP_SUMMARY
+
+      # This is a huge hack, there's no official action for removing artifacts currently.
+      # Also ACTIONS_RUNTIME_URL and ACTIONS_RUNTIME_TOKEN aren't available in normal run
+      # steps, so we have to use github-script to get them.
+      #
+      # The advantage of doing this, though, is that we save on artifact storage space.
+
+      - name: Get secret artifact runtime URL
+        uses: actions/github-script@v6
+        id: get-runtime-url
+        with:
+          result-encoding: string
+          script: |
+            const { ACTIONS_RUNTIME_URL } = process.env;
+            return ACTIONS_RUNTIME_URL;
+
+      - name: Get secret artifact runtime token
+        uses: actions/github-script@v6
+        id: get-runtime-token
+        with:
+          result-encoding: string
+          script: |
+            const { ACTIONS_RUNTIME_TOKEN } = process.env;
+            return ACTIONS_RUNTIME_TOKEN;
+
+      - name: Remove intermediary artifacts
+        env:
+          ACTIONS_RUNTIME_URL: ${{ steps.get-runtime-url.outputs.result }}
+          ACTIONS_RUNTIME_TOKEN: ${{ steps.get-runtime-token.outputs.result }}
+        run: |
+          echo "::add-mask::${ACTIONS_RUNTIME_TOKEN}"
+          artifacts=$(
+            curl -H "Authorization: Bearer $ACTIONS_RUNTIME_TOKEN" \
+              ${ACTIONS_RUNTIME_URL}_apis/pipelines/workflows/${{ github.run_id }}/artifacts?api-version=6.0-preview \
+            | jq -r '.value | .[] | select(.name | startswith("coverage-")) | .url'
+          )
+
+          for artifact in $artifacts; do
+            curl -i -X DELETE -H "Accept: application/json;api-version=6.0-preview" -H "Authorization: Bearer $ACTIONS_RUNTIME_TOKEN" "$artifact"
+          done
+
+      - name: Upload coverage report as artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: awx-collection-integration-coverage-html
+          path: ~/.ansible/collections/ansible_collections/awx/awx/tests/output/reports/coverage
--- a/.github/workflows/devel_images.yml
+++ b/.github/workflows/devel_images.yml
@@ -16,7 +16,7 @@ jobs:
      packages: write
      contents: read
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

      - name: Get python version from Makefile
        run: echo py_version=`make PYTHON_VERSION` >> $GITHUB_ENV
@@ -28,7 +28,7 @@ jobs:
          OWNER: '${{ github.repository_owner }}'

      - name: Install python ${{ env.py_version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.py_version }}

@@ -48,8 +48,11 @@ jobs:
          DEV_DOCKER_TAG_BASE=ghcr.io/${OWNER_LC} COMPOSE_TAG=${GITHUB_REF##*/} make awx-kube-dev-build
          DEV_DOCKER_TAG_BASE=ghcr.io/${OWNER_LC} COMPOSE_TAG=${GITHUB_REF##*/} make awx-kube-build

-      - name: Push image
+      - name: Push development images
        run: |
          docker push ghcr.io/${OWNER_LC}/awx_devel:${GITHUB_REF##*/}
          docker push ghcr.io/${OWNER_LC}/awx_kube_devel:${GITHUB_REF##*/}
-          docker push ghcr.io/${OWNER_LC}/awx:${GITHUB_REF##*/}
+
+      - name: Push AWX k8s image, only for upstream and feature branches
+        run: docker push ghcr.io/${OWNER_LC}/awx:${GITHUB_REF##*/}
+        if: endsWith(github.repository, '/awx')
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,16 @@
+---
+name: Docsite CI
+on:
+  pull_request:
+jobs:
+  docsite-build:
+    name: docsite test build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: install tox
+        run: pip install tox
+
+      - name: Assure docs can be built
+        run: tox -e docs
--- a/.github/workflows/e2e_test.yml
+++ b/.github/workflows/e2e_test.yml
@@ -19,41 +19,20 @@ jobs:
        job: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]

    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

-      - name: Get python version from Makefile
-        run: echo py_version=`make PYTHON_VERSION` >> $GITHUB_ENV
-
-      - name: Install python ${{ env.py_version }}
-        uses: actions/setup-python@v2
+      - uses: ./.github/actions/run_awx_devel
+        id: awx
        with:
-          python-version: ${{ env.py_version }}
-
-      - name: Install system deps
-        run: sudo apt-get install -y gettext
-
-      - name: Log in to registry
-        run: |
-          echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
-
-      - name: Pre-pull image to warm build cache
-        run: |
-          docker pull ghcr.io/${{ github.repository_owner }}/awx_devel:${{ github.base_ref }}
-
-      - name: Build UI
-        run: |
-          DEV_DOCKER_TAG_BASE=ghcr.io/${{ github.repository_owner }} COMPOSE_TAG=${{ github.base_ref }} make ui-devel
-
-      - name: Start AWX
-        run: |
-          DEV_DOCKER_TAG_BASE=ghcr.io/${{ github.repository_owner }} COMPOSE_TAG=${{ github.base_ref }} make docker-compose &> make-docker-compose-output.log &
+          build-ui: true
+          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Pull awx_cypress_base image
        run: |
          docker pull quay.io/awx/awx_cypress_base:latest

      - name: Checkout test project
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
        with:
          repository: ${{ github.repository_owner }}/tower-qa
          ssh-key: ${{ secrets.QA_REPO_KEY }}
@@ -65,18 +44,6 @@ jobs:
          cd ${{ secrets.E2E_PROJECT }}/ui-tests/awx-pf-tests
          docker build -t awx-pf-tests .

-      - name: Update default AWX password
-        run: |
-          while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' -k https://localhost:8043/api/v2/ping/)" != "200" ]]
-          do
-          echo "Waiting for AWX..."
-          sleep 5;
-          done
-          echo "AWX is up, updating the password..."
-          docker exec -i tools_awx_1 sh <<-EOSH
-            awx-manage update_password --username=admin --password=password
-          EOSH
-
      - name: Run E2E tests
        env:
          CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }}
@@ -86,7 +53,7 @@ jobs:
          export COMMIT_INFO_SHA=$GITHUB_SHA
          export COMMIT_INFO_REMOTE=$GITHUB_REPOSITORY_OWNER
          cd ${{ secrets.E2E_PROJECT }}/ui-tests/awx-pf-tests
-          AWX_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' tools_awx_1)
+          AWX_IP=${{ steps.awx.outputs.ip }}
          printenv > .env
          echo "Executing tests:"
          docker run \
@@ -102,8 +69,7 @@ jobs:
          -w /e2e \
          awx-pf-tests run --project .

-      - name: Save AWX logs
-        uses: actions/upload-artifact@v2
+      - uses: ./.github/actions/upload_awx_devel_logs
+        if: always()
        with:
-          name: AWX-logs-${{ matrix.job }}
-          path: make-docker-compose-output.log
+          log-filename: e2e-${{ matrix.job }}.log
--- a/.github/workflows/label_issue.yml
+++ b/.github/workflows/label_issue.yml
@@ -17,7 +17,7 @@ jobs:

    steps:
      - name: Label Issue
-        uses: github/issue-labeler@v2.4.1
+        uses: github/issue-labeler@v3.1
        with:
          repo-token: "${{ secrets.GITHUB_TOKEN }}"
          not-before: 2021-12-07T07:00:00Z
@@ -28,7 +28,7 @@ jobs:
    runs-on: ubuntu-latest
    name: Label Issue - Community
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
      - uses: actions/setup-python@v4
      - name: Install python requests
        run: pip install requests
--- a/.github/workflows/label_pr.yml
+++ b/.github/workflows/label_pr.yml
@@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-latest
    name: Label PR - Community
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
      - uses: actions/setup-python@v4
      - name: Install python requests
        run: pip install requests
--- a/.github/workflows/pr_body_check.yml
+++ b/.github/workflows/pr_body_check.yml
@@ -7,6 +7,7 @@ on:
    types: [opened, edited, reopened, synchronize]
 jobs:
  pr-check:
+    if: github.repository_owner == 'ansible' && endsWith(github.repository, 'awx')
    name: Scan PR description for semantic versioning keywords
    runs-on: ubuntu-latest
    permissions:
--- a/.github/workflows/promote.yml
+++ b/.github/workflows/promote.yml
@@ -17,13 +17,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout awx
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3

      - name: Get python version from Makefile
        run: echo py_version=`make PYTHON_VERSION` >> $GITHUB_ENV

      - name: Install python ${{ env.py_version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.py_version }}

@@ -40,8 +40,12 @@ jobs:
        if: ${{ github.repository_owner != 'ansible' }}

      - name: Build collection and publish to galaxy
+        env:
+          COLLECTION_NAMESPACE: ${{ env.collection_namespace }}
+          COLLECTION_VERSION: ${{ github.event.release.tag_name }}
+          COLLECTION_TEMPLATE_VERSION: true
        run: |
-          COLLECTION_TEMPLATE_VERSION=true COLLECTION_NAMESPACE=${{ env.collection_namespace }} make build_collection
+          make build_collection
          if [ "$(curl --head -sw '%{http_code}' https://galaxy.ansible.com/download/${{ env.collection_namespace }}-awx-${{ github.event.release.tag_name }}.tar.gz | tail -1)" == "302" ] ; then \
              echo "Galaxy release already done"; \
          else \
--- a/.github/workflows/stage.yml
+++ b/.github/workflows/stage.yml
@@ -44,7 +44,7 @@ jobs:
          exit 0

      - name: Checkout awx
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
        with:
          path: awx

@@ -52,18 +52,18 @@ jobs:
        run: echo py_version=`make PYTHON_VERSION` >> $GITHUB_ENV

      - name: Install python ${{ env.py_version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.py_version }}

      - name: Checkout awx-logos
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
        with:
          repository: ansible/awx-logos
          path: awx-logos

      - name: Checkout awx-operator
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
        with:
          repository: ${{ github.repository_owner }}/awx-operator
          path: awx-operator
--- a/.github/workflows/upload_schema.yml
+++ b/.github/workflows/upload_schema.yml
@@ -17,13 +17,13 @@ jobs:
      packages: write
      contents: read
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

      - name: Get python version from Makefile
        run: echo py_version=`make PYTHON_VERSION` >> $GITHUB_ENV

      - name: Install python ${{ env.py_version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.py_version }}       

--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,7 @@ use_dev_supervisor.txt

 awx/ui_next/src
 awx/ui_next/build
+
+# Docs build stuff
+docs/docsite/build/
+_readthedocs/
--- a/.gitleaks.toml
+++ b/.gitleaks.toml
@@ -0,0 +1,5 @@
+[allowlist]
+description = "Documentation contains example secrets and passwords"
+paths = [
+  "docs/docsite/rst/administration/oauth2_token_auth.rst",
+]
--- a/.pip-tools.toml
+++ b/.pip-tools.toml
@@ -0,0 +1,5 @@
+[tool.pip-tools]
+resolver = "backtracking"
+allow-unsafe = true
+strip-extras = true
+quiet = true
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -0,0 +1,15 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: >-
+      3.11
+  commands:
+    - pip install --user tox
+    - python3 -m tox -e docs
+    - mkdir -p _readthedocs/html/
+    - mv docs/docsite/build/html/* _readthedocs/html/
--- a/.yamllint
+++ b/.yamllint
@@ -10,6 +10,7 @@ ignore: |
  tools/docker-compose/_sources
  # django template files
  awx/api/templates/instance_install_bundle/**
+  .readthedocs.yaml

 extends: default

--- a/DATA_MIGRATION.md
+++ b/DATA_MIGRATION.md
@@ -4,6 +4,6 @@

 Early versions of AWX did not support seamless upgrades between major versions and required the use of a backup and restore tool to perform upgrades.

-Users who wish to upgrade modern AWX installations should follow the instructions at:
+As of version 18.0, `awx-operator` is the preferred install/upgrade method. Users who wish to upgrade modern AWX installations should follow the instructions at:

-https://github.com/ansible/awx/blob/devel/INSTALL.md#upgrading-from-previous-versions
+https://github.com/ansible/awx-operator/blob/devel/docs/upgrade/upgrading.md
--- a/31
+++ b/31
@@ -1,10 +1,12 @@
 -include awx/ui_next/Makefile

 PYTHON := $(notdir $(shell for i in python3.9 python3; do command -v $$i; done|sed 1q))
+SHELL := bash
 DOCKER_COMPOSE ?= docker-compose
 OFFICIAL ?= no
 NODE ?= node
 NPM_BIN ?= npm
+KIND_BIN ?= $(shell which kind)
 CHROMIUM_BIN=/tmp/chrome-linux/chrome
 GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD)
 MANAGEMENT_COMMAND ?= awx-manage
@@ -27,6 +29,8 @@ COLLECTION_TEMPLATE_VERSION ?= false
 # NOTE: This defaults the container image version to the branch that's active
 COMPOSE_TAG ?= $(GIT_BRANCH)
 MAIN_NODE_TYPE ?= hybrid
+# If set to true docker-compose will also start a pgbouncer instance and use it
+PGBOUNCER ?= false
 # If set to true docker-compose will also start a keycloak instance
 KEYCLOAK ?= false
 # If set to true docker-compose will also start an ldap instance
@@ -37,6 +41,8 @@ SPLUNK ?= false
 PROMETHEUS ?= false
 # If set to true docker-compose will also start a grafana instance
 GRAFANA ?= false
+# If set to true docker-compose will also start a hashicorp vault instance
+VAULT ?= false
 # If set to true docker-compose will also start a tacacs+ instance
 TACACS ?= false

@@ -73,7 +79,7 @@ I18N_FLAG_FILE = .i18n_built
 	sdist \
 	ui-release ui-devel \
 	VERSION PYTHON_VERSION docker-compose-sources \
-	.git/hooks/pre-commit github_ci_setup github_ci_runner
+	.git/hooks/pre-commit

 clean-tmp:
 	rm -rf tmp/
@@ -318,21 +324,10 @@ test:
 	cd awxkit && $(VENV_BASE)/awx/bin/tox -re py3
 	awx-manage check_migrations --dry-run --check  -n 'missing_migration_file'

-## Login to Github container image registry, pull image, then build image.
-github_ci_setup:
-	# GITHUB_ACTOR is automatic github actions env var
-	# CI_GITHUB_TOKEN is defined in .github files
-	echo $(CI_GITHUB_TOKEN) | docker login ghcr.io -u $(GITHUB_ACTOR) --password-stdin
-	docker pull $(DEVEL_IMAGE_NAME) || :  # Pre-pull image to warm build cache
-	$(MAKE) docker-compose-build
-
 ## Runs AWX_DOCKER_CMD inside a new docker container.
 docker-runner:
 	docker run -u $(shell id -u) --rm -v $(shell pwd):/awx_devel/:Z --workdir=/awx_devel $(DEVEL_IMAGE_NAME) $(AWX_DOCKER_CMD)

-## Builds image and runs AWX_DOCKER_CMD in it, mainly for .github checks.
-github_ci_runner: github_ci_setup docker-runner
-
 test_collection:
 	rm -f $(shell ls -d $(VENV_BASE)/awx/lib/python* | head -n 1)/no-global-site-packages.txt
 	if [ "$(VENV_BASE)" ]; then \
@@ -378,7 +373,7 @@ test_collection_sanity:
 	cd $(COLLECTION_INSTALL) && ansible-test sanity $(COLLECTION_SANITY_ARGS)

 test_collection_integration: install_collection
-	cd $(COLLECTION_INSTALL) && ansible-test integration $(COLLECTION_TEST_TARGET)
+	cd $(COLLECTION_INSTALL) && ansible-test integration -vvv $(COLLECTION_TEST_TARGET)

 test_unit:
 	@if [ "$(VENV_BASE)" ]; then \
@@ -520,15 +515,20 @@ docker-compose-sources: .git/hooks/pre-commit
 	    -e control_plane_node_count=$(CONTROL_PLANE_NODE_COUNT) \
 	    -e execution_node_count=$(EXECUTION_NODE_COUNT) \
 	    -e minikube_container_group=$(MINIKUBE_CONTAINER_GROUP) \
+	    -e enable_pgbouncer=$(PGBOUNCER) \
 	    -e enable_keycloak=$(KEYCLOAK) \
 	    -e enable_ldap=$(LDAP) \
 	    -e enable_splunk=$(SPLUNK) \
 	    -e enable_prometheus=$(PROMETHEUS) \
 	    -e enable_grafana=$(GRAFANA) \
+	    -e enable_vault=$(VAULT) \
 	    -e enable_tacacs=$(TACACS) \
            $(EXTRA_SOURCES_ANSIBLE_OPTS)

 docker-compose: awx/projects docker-compose-sources
+	ansible-galaxy install --ignore-certs -r tools/docker-compose/ansible/requirements.yml;
+	ansible-playbook -i tools/docker-compose/inventory tools/docker-compose/ansible/initialize_containers.yml \
+	  -e enable_vault=$(VAULT);
 	$(DOCKER_COMPOSE) -f tools/docker-compose/_sources/docker-compose.yml $(COMPOSE_OPTS) up $(COMPOSE_UP_OPTS) --remove-orphans

 docker-compose-credential-plugins: awx/projects docker-compose-sources
@@ -580,7 +580,7 @@ docker-clean:
 	-$(foreach image_id,$(shell docker images --filter=reference='*/*/*awx_devel*' --filter=reference='*/*awx_devel*' --filter=reference='*awx_devel*' -aq),docker rmi --force $(image_id);)

 docker-clean-volumes: docker-compose-clean docker-compose-container-group-clean
-	docker volume rm -f tools_awx_db tools_grafana_storage tools_prometheus_storage $(docker volume ls --filter name=tools_redis_socket_ -q)
+	docker volume rm -f tools_awx_db tools_vault_1 tools_grafana_storage tools_prometheus_storage $(docker volume ls --filter name=tools_redis_socket_ -q)

 docker-refresh: docker-clean docker-compose

@@ -654,6 +654,9 @@ awx-kube-dev-build: Dockerfile.kube-dev
 	    -t $(DEV_DOCKER_TAG_BASE)/awx_kube_devel:$(COMPOSE_TAG) .


+kind-dev-load: awx-kube-dev-build
+	$(KIND_BIN) load docker-image $(DEV_DOCKER_TAG_BASE)/awx_kube_devel:$(COMPOSE_TAG)
+
 # Translation TASKS
 # --------------------------------------

--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 [![CI](https://github.com/ansible/awx/actions/workflows/ci.yml/badge.svg?branch=devel)](https://github.com/ansible/awx/actions/workflows/ci.yml) [![Code of Conduct](https://img.shields.io/badge/code%20of%20conduct-Ansible-yellow.svg)](https://docs.ansible.com/ansible/latest/community/code_of_conduct.html) [![Apache v2 License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](https://github.com/ansible/awx/blob/devel/LICENSE.md) [![AWX Mailing List](https://img.shields.io/badge/mailing%20list-AWX-orange.svg)](https://groups.google.com/g/awx-project)
-[![IRC Chat - #ansible-awx](https://img.shields.io/badge/IRC-%23ansible--awx-blueviolet.svg)](https://libera.chat)
+[![Ansible Matrix](https://img.shields.io/badge/matrix-Ansible%20Community-blueviolet.svg?logo=matrix)](https://chat.ansible.im/#/welcome) [![Ansible Discourse](https://img.shields.io/badge/discourse-Ansible%20Community-yellowgreen.svg?logo=discourse)](https://forum.ansible.com)

 <img src="https://raw.githubusercontent.com/ansible/awx-logos/master/awx/ui/client/assets/logo-login.svg?sanitize=true" width=200 alt="AWX" />

@@ -30,12 +30,12 @@ If you're experiencing a problem that you feel is a bug in AWX or have ideas for
 Code of Conduct
 ---------------

-We ask all of our community members and contributors to adhere to the [Ansible code of conduct](http://docs.ansible.com/ansible/latest/community/code_of_conduct.html). If you have questions or need assistance, please reach out to our community team at [codeofconduct@ansible.com](mailto:codeofconduct@ansible.com)   
+We ask all of our community members and contributors to adhere to the [Ansible code of conduct](http://docs.ansible.com/ansible/latest/community/code_of_conduct.html). If you have questions or need assistance, please reach out to our community team at [codeofconduct@ansible.com](mailto:codeofconduct@ansible.com)

 Get Involved
 ------------

 We welcome your feedback and ideas. Here's how to reach us with feedback and questions:

- Join the `#ansible-awx` channel on irc.libera.chat
- Join the [mailing list](https://groups.google.com/forum/#!forum/awx-project) 
+- Join the [Ansible AWX channel on Matrix](https://matrix.to/#/#awx:ansible.com)
+- Join the [Ansible Community Forum](https://forum.ansible.com)
--- a/awx/init.py
+++ b/awx/init.py
@@ -52,39 +52,14 @@ try:
 except ImportError:  # pragma: no cover
    MODE = 'production'

-import hashlib

 try:
    import django  # noqa: F401
-
-    HAS_DJANGO = True
 except ImportError:
-    HAS_DJANGO = False
+    pass
 else:
-    from django.db.backends.base import schema
-    from django.db.models import indexes
-    from django.db.backends.utils import names_digest
    from django.db import connection

-if HAS_DJANGO is True:
-    # See upgrade blocker note in requirements/README.md
-    try:
-        names_digest('foo', 'bar', 'baz', length=8)
-    except ValueError:
-
-        def names_digest(*args, length):
-            """
-            Generate a 32-bit digest of a set of arguments that can be used to shorten
-            identifying names.  Support for use in FIPS environments.
-            """
-            h = hashlib.md5(usedforsecurity=False)
-            for arg in args:
-                h.update(arg.encode())
-            return h.hexdigest()[:length]
-
-        schema.names_digest = names_digest
-        indexes.names_digest = names_digest
-

 def find_commands(management_dir):
    # Modified version of function from django/core/management/__init__.py.
--- a/awx/api/generics.py
+++ b/awx/api/generics.py
@@ -232,7 +232,8 @@ class APIView(views.APIView):

        response = super(APIView, self).finalize_response(request, response, *args, **kwargs)
        time_started = getattr(self, 'time_started', None)
-        response['X-API-Product-Version'] = get_awx_version()
+        if request.user.is_authenticated:
+            response['X-API-Product-Version'] = get_awx_version()
        response['X-API-Product-Name'] = server_product_name()

        response['X-API-Node'] = settings.CLUSTER_HOST_ID
--- a/awx/api/serializers.py
+++ b/awx/api/serializers.py
@@ -1629,8 +1629,8 @@ class ProjectUpdateDetailSerializer(ProjectUpdateSerializer):
        fields = ('*', 'host_status_counts', 'playbook_counts')

    def get_playbook_counts(self, obj):
-        task_count = obj.project_update_events.filter(event='playbook_on_task_start').count()
-        play_count = obj.project_update_events.filter(event='playbook_on_play_start').count()
+        task_count = obj.get_event_queryset().filter(event='playbook_on_task_start').count()
+        play_count = obj.get_event_queryset().filter(event='playbook_on_play_start').count()

        data = {'play_count': play_count, 'task_count': task_count}

@@ -3233,7 +3233,7 @@ class JobTemplateSerializer(JobTemplateMixin, UnifiedJobTemplateSerializer, JobO
        if get_field_from_model_or_attrs('host_config_key') and not inventory:
            raise serializers.ValidationError({'host_config_key': _("Cannot enable provisioning callback without an inventory set.")})

-        prompting_error_message = _("Must either set a default value or ask to prompt on launch.")
+        prompting_error_message = _("You must either set a default value or ask to prompt on launch.")
        if project is None:
            raise serializers.ValidationError({'project': _("Job Templates must have a project assigned.")})
        elif inventory is None and not get_field_from_model_or_attrs('ask_inventory_on_launch'):
@@ -5356,10 +5356,16 @@ class ScheduleSerializer(LaunchConfigurationBaseSerializer, SchedulePreviewSeria
 class InstanceLinkSerializer(BaseSerializer):
    class Meta:
        model = InstanceLink
-        fields = ('source', 'target', 'link_state')
+        fields = ('id', 'url', 'related', 'source', 'target', 'link_state')

-    source = serializers.SlugRelatedField(slug_field="hostname", read_only=True)
-    target = serializers.SlugRelatedField(slug_field="hostname", read_only=True)
+    source = serializers.SlugRelatedField(slug_field="hostname", queryset=Instance.objects.all())
+    target = serializers.SlugRelatedField(slug_field="hostname", queryset=Instance.objects.all())
+
+    def get_related(self, obj):
+        res = super(InstanceLinkSerializer, self).get_related(obj)
+        res['source_instance'] = self.reverse('api:instance_detail', kwargs={'pk': obj.source.id})
+        res['target_instance'] = self.reverse('api:instance_detail', kwargs={'pk': obj.target.id})
+        return res


 class InstanceNodeSerializer(BaseSerializer):
@@ -5376,6 +5382,7 @@ class InstanceSerializer(BaseSerializer):
    jobs_running = serializers.IntegerField(help_text=_('Count of jobs in the running or waiting state that are targeted for this instance'), read_only=True)
    jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance'), read_only=True)
    health_check_pending = serializers.SerializerMethodField()
+    peers = serializers.SlugRelatedField(many=True, required=False, slug_field="hostname", queryset=Instance.objects.all())

    class Meta:
        model = Instance
@@ -5412,6 +5419,8 @@ class InstanceSerializer(BaseSerializer):
            'node_state',
            'ip_address',
            'listener_port',
+            'peers',
+            'peers_from_control_nodes',
        )
        extra_kwargs = {
            'node_type': {'initial': Instance.Types.EXECUTION, 'default': Instance.Types.EXECUTION},
@@ -5464,22 +5473,57 @@ class InstanceSerializer(BaseSerializer):
    def get_health_check_pending(self, obj):
        return obj.health_check_pending

-    def validate(self, data):
-        if self.instance:
-            if self.instance.node_type == Instance.Types.HOP:
-                raise serializers.ValidationError("Hop node instances may not be changed.")
-        else:
-            if not settings.IS_K8S:
-                raise serializers.ValidationError("Can only create instances on Kubernetes or OpenShift.")
-        return data
+    def validate(self, attrs):
+        def get_field_from_model_or_attrs(fd):
+            return attrs.get(fd, self.instance and getattr(self.instance, fd) or None)
+
+        def check_peers_changed():
+            '''
+            return True if
+            - 'peers' in attrs
+            - instance peers matches peers in attrs
+            '''
+            return self.instance and 'peers' in attrs and set(self.instance.peers.all()) != set(attrs['peers'])
+
+        if not self.instance and not settings.IS_K8S:
+            raise serializers.ValidationError(_("Can only create instances on Kubernetes or OpenShift."))
+
+        node_type = get_field_from_model_or_attrs("node_type")
+        peers_from_control_nodes = get_field_from_model_or_attrs("peers_from_control_nodes")
+        listener_port = get_field_from_model_or_attrs("listener_port")
+        peers = attrs.get('peers', [])
+
+        if peers_from_control_nodes and node_type not in (Instance.Types.EXECUTION, Instance.Types.HOP):
+            raise serializers.ValidationError(_("peers_from_control_nodes can only be enabled for execution or hop nodes."))
+
+        if node_type in [Instance.Types.CONTROL, Instance.Types.HYBRID]:
+            if check_peers_changed():
+                raise serializers.ValidationError(
+                    _("Setting peers manually for control nodes is not allowed. Enable peers_from_control_nodes on the hop and execution nodes instead.")
+                )
+
+        if not listener_port and peers_from_control_nodes:
+            raise serializers.ValidationError(_("Field listener_port must be a valid integer when peers_from_control_nodes is enabled."))
+
+        if not listener_port and self.instance and self.instance.peers_from.exists():
+            raise serializers.ValidationError(_("Field listener_port must be a valid integer when other nodes peer to it."))
+
+        for peer in peers:
+            if peer.listener_port is None:
+                raise serializers.ValidationError(_("Field listener_port must be set on peer ") + peer.hostname + ".")
+
+        if not settings.IS_K8S:
+            if check_peers_changed():
+                raise serializers.ValidationError(_("Cannot change peers."))
+
+        return super().validate(attrs)

    def validate_node_type(self, value):
-        if not self.instance:
-            if value not in (Instance.Types.EXECUTION,):
-                raise serializers.ValidationError("Can only create execution nodes.")
-        else:
-            if self.instance.node_type != value:
-                raise serializers.ValidationError("Cannot change node type.")
+        if not self.instance and value not in [Instance.Types.HOP, Instance.Types.EXECUTION]:
+            raise serializers.ValidationError(_("Can only create execution or hop nodes."))
+
+        if self.instance and self.instance.node_type != value:
+            raise serializers.ValidationError(_("Cannot change node type."))

        return value

@@ -5487,30 +5531,41 @@ class InstanceSerializer(BaseSerializer):
        if self.instance:
            if value != self.instance.node_state:
                if not settings.IS_K8S:
-                    raise serializers.ValidationError("Can only change the state on Kubernetes or OpenShift.")
+                    raise serializers.ValidationError(_("Can only change the state on Kubernetes or OpenShift."))
                if value != Instance.States.DEPROVISIONING:
-                    raise serializers.ValidationError("Can only change instances to the 'deprovisioning' state.")
-                if self.instance.node_type not in (Instance.Types.EXECUTION,):
-                    raise serializers.ValidationError("Can only deprovision execution nodes.")
+                    raise serializers.ValidationError(_("Can only change instances to the 'deprovisioning' state."))
+                if self.instance.node_type not in (Instance.Types.EXECUTION, Instance.Types.HOP):
+                    raise serializers.ValidationError(_("Can only deprovision execution or hop nodes."))
        else:
            if value and value != Instance.States.INSTALLED:
-                raise serializers.ValidationError("Can only create instances in the 'installed' state.")
+                raise serializers.ValidationError(_("Can only create instances in the 'installed' state."))

        return value

    def validate_hostname(self, value):
        """
-        - Hostname cannot be "localhost" - but can be something like localhost.domain
-        - Cannot change the hostname of an-already instantiated & initialized Instance object
+        Cannot change the hostname
        """
        if self.instance and self.instance.hostname != value:
-            raise serializers.ValidationError("Cannot change hostname.")
+            raise serializers.ValidationError(_("Cannot change hostname."))

        return value

    def validate_listener_port(self, value):
-        if self.instance and self.instance.listener_port != value:
-            raise serializers.ValidationError("Cannot change listener port.")
+        """
+        Cannot change listener port, unless going from none to integer, and vice versa
+        """
+        if value and self.instance and self.instance.listener_port and self.instance.listener_port != value:
+            raise serializers.ValidationError(_("Cannot change listener port."))
+
+        return value
+
+    def validate_peers_from_control_nodes(self, value):
+        """
+        Can only enable for K8S based deployments
+        """
+        if value and not settings.IS_K8S:
+            raise serializers.ValidationError(_("Can only be enabled on Kubernetes or Openshift."))

        return value

@@ -5518,7 +5573,19 @@ class InstanceSerializer(BaseSerializer):
 class InstanceHealthCheckSerializer(BaseSerializer):
    class Meta:
        model = Instance
-        read_only_fields = ('uuid', 'hostname', 'version', 'last_health_check', 'errors', 'cpu', 'memory', 'cpu_capacity', 'mem_capacity', 'capacity')
+        read_only_fields = (
+            'uuid',
+            'hostname',
+            'ip_address',
+            'version',
+            'last_health_check',
+            'errors',
+            'cpu',
+            'memory',
+            'cpu_capacity',
+            'mem_capacity',
+            'capacity',
+        )
        fields = read_only_fields


--- a/awx/api/templates/instance_install_bundle/group_vars/all.yml
+++ b/awx/api/templates/instance_install_bundle/group_vars/all.yml
@@ -3,21 +3,35 @@ receptor_group: awx
 receptor_verify: true
 receptor_tls: true
 receptor_mintls13: false
+{% if instance.node_type == "execution" %}
 receptor_work_commands:
  ansible-runner:
    command: ansible-runner
    params: worker
    allowruntimeparams: true
    verifysignature: true
+additional_python_packages:
+  - ansible-runner
+{% endif %}
 custom_worksign_public_keyfile: receptor/work_public_key.pem
 custom_tls_certfile: receptor/tls/receptor.crt
 custom_tls_keyfile: receptor/tls/receptor.key
-custom_ca_certfile: receptor/tls/ca/receptor-ca.crt
+custom_ca_certfile: receptor/tls/ca/mesh-CA.crt
 receptor_protocol: 'tcp'
+{% if instance.listener_port %}
 receptor_listener: true
 receptor_port: {{ instance.listener_port }}
-receptor_dependencies:
-  - python39-pip
+{% else %}
+receptor_listener: false
+{% endif %}
+{% if peers %}
+receptor_peers:
+{% for peer in peers %}
+  - host: {{ peer.host }}
+    port: {{ peer.port }}
+    protocol: tcp
+{% endfor %}
+{% endif %}
 {% verbatim %}
 podman_user: "{{ receptor_user }}"
 podman_group: "{{ receptor_group }}"
--- a/awx/api/templates/instance_install_bundle/install_receptor.yml
+++ b/awx/api/templates/instance_install_bundle/install_receptor.yml
@@ -1,20 +1,16 @@
-{% verbatim %}
 ---
 - hosts: all
  become: yes
  tasks:
    - name: Create the receptor user
      user:
+{% verbatim %}
        name: "{{ receptor_user }}"
+{% endverbatim %}
        shell: /bin/bash
-    - name: Enable Copr repo for Receptor
-      command: dnf copr enable ansible-awx/receptor -y
+{% if instance.node_type == "execution" %}
    - import_role:
        name: ansible.receptor.podman
+{% endif %}
    - import_role:
        name: ansible.receptor.setup
-    - name: Install ansible-runner
-      pip:
-        name: ansible-runner
-        executable: pip3.9
-{% endverbatim %}
--- a/awx/api/templates/instance_install_bundle/requirements.yml
+++ b/awx/api/templates/instance_install_bundle/requirements.yml
@@ -1,4 +1,4 @@
 ---
 collections:
  - name: ansible.receptor
-    version: 1.1.0
+    version: 2.0.2
--- a/awx/api/urls/urls.py
+++ b/awx/api/urls/urls.py
@@ -30,7 +30,7 @@ from awx.api.views import (
    OAuth2TokenList,
    ApplicationOAuth2TokenList,
    OAuth2ApplicationDetail,
-    # HostMetricSummaryMonthlyList, # It will be enabled in future version of the AWX
+    HostMetricSummaryMonthlyList,
 )

 from awx.api.views.bulk import (
@@ -123,8 +123,7 @@ v2_urls = [
    re_path(r'^constructed_inventories/', include(constructed_inventory_urls)),
    re_path(r'^hosts/', include(host_urls)),
    re_path(r'^host_metrics/', include(host_metric_urls)),
-    # It will be enabled in future version of the AWX
-    # re_path(r'^host_metric_summary_monthly/$', HostMetricSummaryMonthlyList.as_view(), name='host_metric_summary_monthly_list'),
+    re_path(r'^host_metric_summary_monthly/$', HostMetricSummaryMonthlyList.as_view(), name='host_metric_summary_monthly_list'),
    re_path(r'^groups/', include(group_urls)),
    re_path(r'^inventory_sources/', include(inventory_source_urls)),
    re_path(r'^inventory_updates/', include(inventory_update_urls)),
--- a/awx/api/views/init.py
+++ b/awx/api/views/init.py
@@ -341,17 +341,18 @@ class InstanceDetail(RetrieveUpdateAPIView):

    def update_raw_data(self, data):
        # these fields are only valid on creation of an instance, so they unwanted on detail view
-        data.pop('listener_port', None)
        data.pop('node_type', None)
        data.pop('hostname', None)
+        data.pop('ip_address', None)
        return super(InstanceDetail, self).update_raw_data(data)

    def update(self, request, *args, **kwargs):
        r = super(InstanceDetail, self).update(request, *args, **kwargs)
        if status.is_success(r.status_code):
            obj = self.get_object()
-            obj.set_capacity_value()
-            obj.save(update_fields=['capacity'])
+            capacity_changed = obj.set_capacity_value()
+            if capacity_changed:
+                obj.save(update_fields=['capacity'])
            r.data = serializers.InstanceSerializer(obj, context=self.get_serializer_context()).to_representation(obj)
        return r

@@ -1564,16 +1565,15 @@ class HostMetricDetail(RetrieveDestroyAPIView):
        return Response(status=status.HTTP_204_NO_CONTENT)


-# It will be enabled in future version of the AWX
-# class HostMetricSummaryMonthlyList(ListAPIView):
-#     name = _("Host Metrics Summary Monthly")
-#     model = models.HostMetricSummaryMonthly
-#     serializer_class = serializers.HostMetricSummaryMonthlySerializer
-#     permission_classes = (IsSystemAdminOrAuditor,)
-#     search_fields = ('date',)
-#
-#     def get_queryset(self):
-#         return self.model.objects.all()
+class HostMetricSummaryMonthlyList(ListAPIView):
+    name = _("Host Metrics Summary Monthly")
+    model = models.HostMetricSummaryMonthly
+    serializer_class = serializers.HostMetricSummaryMonthlySerializer
+    permission_classes = (IsSystemAdminOrAuditor,)
+    search_fields = ('date',)
+
+    def get_queryset(self):
+        return self.model.objects.all()


 class HostList(HostRelatedSearchMixin, ListCreateAPIView):
--- a/awx/api/views/bulk.py
+++ b/awx/api/views/bulk.py
@@ -1,5 +1,7 @@
 from collections import OrderedDict

+from django.utils.translation import gettext_lazy as _
+
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.renderers import JSONRenderer
 from rest_framework.reverse import reverse
@@ -18,6 +20,9 @@ from awx.api import (


 class BulkView(APIView):
+    name = _('Bulk')
+    swagger_topic = 'Bulk'
+
    permission_classes = [IsAuthenticated]
    renderer_classes = [
        renderers.BrowsableAPIRenderer,
--- a/awx/api/views/instance_install_bundle.py
+++ b/awx/api/views/instance_install_bundle.py
@@ -6,6 +6,8 @@ import io
 import ipaddress
 import os
 import tarfile
+import time
+import re

 import asn1
 from awx.api import serializers
@@ -40,6 +42,8 @@ RECEPTOR_OID = "1.3.6.1.4.1.2312.19.1"
 # │   │   └── receptor.key
 # │   └── work-public-key.pem
 # └── requirements.yml
+
+
 class InstanceInstallBundle(GenericAPIView):
    name = _('Install Bundle')
    model = models.Instance
@@ -49,9 +53,9 @@ class InstanceInstallBundle(GenericAPIView):
    def get(self, request, *args, **kwargs):
        instance_obj = self.get_object()

-        if instance_obj.node_type not in ('execution',):
+        if instance_obj.node_type not in ('execution', 'hop'):
            return Response(
-                data=dict(msg=_('Install bundle can only be generated for execution nodes.')),
+                data=dict(msg=_('Install bundle can only be generated for execution or hop nodes.')),
                status=status.HTTP_400_BAD_REQUEST,
            )

@@ -66,37 +70,37 @@ class InstanceInstallBundle(GenericAPIView):
                # generate and write the receptor key to receptor/tls/receptor.key in the tar file
                key, cert = generate_receptor_tls(instance_obj)

+                def tar_addfile(tarinfo, filecontent):
+                    tarinfo.mtime = time.time()
+                    tarinfo.size = len(filecontent)
+                    tar.addfile(tarinfo, io.BytesIO(filecontent))
+
                key_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/receptor/tls/receptor.key")
-                key_tarinfo.size = len(key)
-                tar.addfile(key_tarinfo, io.BytesIO(key))
+                tar_addfile(key_tarinfo, key)

                cert_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/receptor/tls/receptor.crt")
                cert_tarinfo.size = len(cert)
-                tar.addfile(cert_tarinfo, io.BytesIO(cert))
+                tar_addfile(cert_tarinfo, cert)

                # generate and write install_receptor.yml to the tar file
-                playbook = generate_playbook().encode('utf-8')
+                playbook = generate_playbook(instance_obj).encode('utf-8')
                playbook_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/install_receptor.yml")
-                playbook_tarinfo.size = len(playbook)
-                tar.addfile(playbook_tarinfo, io.BytesIO(playbook))
+                tar_addfile(playbook_tarinfo, playbook)

                # generate and write inventory.yml to the tar file
                inventory_yml = generate_inventory_yml(instance_obj).encode('utf-8')
                inventory_yml_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/inventory.yml")
-                inventory_yml_tarinfo.size = len(inventory_yml)
-                tar.addfile(inventory_yml_tarinfo, io.BytesIO(inventory_yml))
+                tar_addfile(inventory_yml_tarinfo, inventory_yml)

                # generate and write group_vars/all.yml to the tar file
                group_vars = generate_group_vars_all_yml(instance_obj).encode('utf-8')
                group_vars_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/group_vars/all.yml")
-                group_vars_tarinfo.size = len(group_vars)
-                tar.addfile(group_vars_tarinfo, io.BytesIO(group_vars))
+                tar_addfile(group_vars_tarinfo, group_vars)

                # generate and write requirements.yml to the tar file
                requirements_yml = generate_requirements_yml().encode('utf-8')
                requirements_yml_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/requirements.yml")
-                requirements_yml_tarinfo.size = len(requirements_yml)
-                tar.addfile(requirements_yml_tarinfo, io.BytesIO(requirements_yml))
+                tar_addfile(requirements_yml_tarinfo, requirements_yml)

            # respond with the tarfile
            f.seek(0)
@@ -105,8 +109,10 @@ class InstanceInstallBundle(GenericAPIView):
            return response


-def generate_playbook():
-    return render_to_string("instance_install_bundle/install_receptor.yml")
+def generate_playbook(instance_obj):
+    playbook_yaml = render_to_string("instance_install_bundle/install_receptor.yml", context=dict(instance=instance_obj))
+    # convert consecutive newlines with a single newline
+    return re.sub(r'\n+', '\n', playbook_yaml)


 def generate_requirements_yml():
@@ -118,7 +124,12 @@ def generate_inventory_yml(instance_obj):


 def generate_group_vars_all_yml(instance_obj):
-    return render_to_string("instance_install_bundle/group_vars/all.yml", context=dict(instance=instance_obj))
+    peers = []
+    for instance in instance_obj.peers.all():
+        peers.append(dict(host=instance.hostname, port=instance.listener_port))
+    all_yaml = render_to_string("instance_install_bundle/group_vars/all.yml", context=dict(instance=instance_obj, peers=peers))
+    # convert consecutive newlines with a single newline
+    return re.sub(r'\n+', '\n', all_yaml)


 def generate_receptor_tls(instance_obj):
--- a/awx/api/views/root.py
+++ b/awx/api/views/root.py
@@ -107,8 +107,7 @@ class ApiVersionRootView(APIView):
        data['groups'] = reverse('api:group_list', request=request)
        data['hosts'] = reverse('api:host_list', request=request)
        data['host_metrics'] = reverse('api:host_metric_list', request=request)
-        # It will be enabled in future version of the AWX
-        # data['host_metric_summary_monthly'] = reverse('api:host_metric_summary_monthly_list', request=request)
+        data['host_metric_summary_monthly'] = reverse('api:host_metric_summary_monthly_list', request=request)
        data['job_templates'] = reverse('api:job_template_list', request=request)
        data['jobs'] = reverse('api:job_list', request=request)
        data['ad_hoc_commands'] = reverse('api:ad_hoc_command_list', request=request)
--- a/awx/conf/apps.py
+++ b/awx/conf/apps.py
@@ -14,7 +14,7 @@ class ConfConfig(AppConfig):
    def ready(self):
        self.module.autodiscover()

-        if not set(sys.argv) & {'migrate', 'check_migrations'}:
+        if not set(sys.argv) & {'migrate', 'check_migrations', 'showmigrations'}:
            from .settings import SettingsWrapper

            SettingsWrapper.initialize()
--- a/awx/conf/settings.py
+++ b/awx/conf/settings.py
@@ -418,6 +418,10 @@ class SettingsWrapper(UserSettingsHolder):
        """Get value while accepting the in-memory cache if key is available"""
        with _ctit_db_wrapper(trans_safe=True):
            return self._get_local(name)
+        # If the last line did not return, that means we hit a database error
+        # in that case, we should not have a local cache value
+        # thus, return empty as a signal to use the default
+        return empty

    def __getattr__(self, name):
        value = empty
--- a/awx/conf/tests/unit/test_settings.py
+++ b/awx/conf/tests/unit/test_settings.py
@@ -13,6 +13,7 @@ from unittest import mock
 from django.conf import LazySettings
 from django.core.cache.backends.locmem import LocMemCache
 from django.core.exceptions import ImproperlyConfigured
+from django.db.utils import Error as DBError
 from django.utils.translation import gettext_lazy as _
 import pytest

@@ -331,3 +332,18 @@ def test_in_memory_cache_works(settings):
    with mock.patch.object(settings, '_get_local') as mock_get:
        assert settings.AWX_VAR == 'DEFAULT'
        mock_get.assert_not_called()
+
+
+@pytest.mark.defined_in_file(AWX_VAR=[])
+def test_getattr_with_database_error(settings):
+    """
+    If a setting is defined via the registry and has a null-ish default which is not None
+    then referencing that setting during a database outage should give that default
+    this is regression testing for a bug where it would return None
+    """
+    settings.registry.register('AWX_VAR', field_class=fields.StringListField, default=[], category=_('System'), category_slug='system')
+    settings._awx_conf_memoizedcache.clear()
+
+    with mock.patch('django.db.backends.base.base.BaseDatabaseWrapper.ensure_connection') as mock_ensure:
+        mock_ensure.side_effect = DBError('for test')
+        assert settings.AWX_VAR == []
--- a/awx/main/access.py
+++ b/awx/main/access.py
@@ -366,9 +366,9 @@ class BaseAccess(object):
            report_violation = lambda message: None
        else:
            report_violation = lambda message: logger.warning(message)
-        if validation_info.get('trial', False) is True or validation_info['instance_count'] == 10:  # basic 10 license
+        if validation_info.get('trial', False) is True:

-            def report_violation(message):
+            def report_violation(message):  # noqa
                raise PermissionDenied(message)

        if check_expiration and validation_info.get('time_remaining', None) is None:
--- a/awx/main/analytics/collectors.py
+++ b/awx/main/analytics/collectors.py
@@ -613,3 +613,20 @@ def host_metric_table(since, full_path, until, **kwargs):
        since.isoformat(), until.isoformat(), since.isoformat(), until.isoformat()
    )
    return _copy_table(table='host_metric', query=host_metric_query, path=full_path)
+
+
+@register('host_metric_summary_monthly_table', '1.0', format='csv', description=_('HostMetricSummaryMonthly export, full sync'), expensive=trivial_slicing)
+def host_metric_summary_monthly_table(since, full_path, **kwargs):
+    query = '''
+    COPY (SELECT main_hostmetricsummarymonthly.id,
+                 main_hostmetricsummarymonthly.date,
+                 main_hostmetricsummarymonthly.license_capacity,
+                 main_hostmetricsummarymonthly.license_consumed,
+                 main_hostmetricsummarymonthly.hosts_added,
+                 main_hostmetricsummarymonthly.hosts_deleted,
+                 main_hostmetricsummarymonthly.indirectly_managed_hosts
+          FROM main_hostmetricsummarymonthly
+          ORDER BY main_hostmetricsummarymonthly.id ASC) TO STDOUT WITH CSV HEADER
+    '''
+
+    return _copy_table(table='host_metric_summary_monthly', query=query, path=full_path)
--- a/awx/main/cache.py
+++ b/awx/main/cache.py
@@ -0,0 +1,87 @@
+import functools
+
+from django.conf import settings
+from django.core.cache.backends.base import DEFAULT_TIMEOUT
+from django.core.cache.backends.redis import RedisCache
+
+from redis.exceptions import ConnectionError, ResponseError, TimeoutError
+import socket
+
+# This list comes from what django-redis ignores and the behavior we are trying
+# to retain while dropping the dependency on django-redis.
+IGNORED_EXCEPTIONS = (TimeoutError, ResponseError, ConnectionError, socket.timeout)
+
+CONNECTION_INTERRUPTED_SENTINEL = object()
+
+
+def optionally_ignore_exceptions(func=None, return_value=None):
+    if func is None:
+        return functools.partial(optionally_ignore_exceptions, return_value=return_value)
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except IGNORED_EXCEPTIONS as e:
+            if settings.DJANGO_REDIS_IGNORE_EXCEPTIONS:
+                return return_value
+            raise e.__cause__ or e
+
+    return wrapper
+
+
+class AWXRedisCache(RedisCache):
+    """
+    We just want to wrap the upstream RedisCache class so that we can ignore
+    the exceptions that it raises when the cache is unavailable.
+    """
+
+    @optionally_ignore_exceptions
+    def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
+        return super().add(key, value, timeout, version)
+
+    @optionally_ignore_exceptions(return_value=CONNECTION_INTERRUPTED_SENTINEL)
+    def _get(self, key, default=None, version=None):
+        return super().get(key, default, version)
+
+    def get(self, key, default=None, version=None):
+        value = self._get(key, default, version)
+        if value is CONNECTION_INTERRUPTED_SENTINEL:
+            return default
+        return value
+
+    @optionally_ignore_exceptions
+    def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
+        return super().set(key, value, timeout, version)
+
+    @optionally_ignore_exceptions
+    def touch(self, key, timeout=DEFAULT_TIMEOUT, version=None):
+        return super().touch(key, timeout, version)
+
+    @optionally_ignore_exceptions
+    def delete(self, key, version=None):
+        return super().delete(key, version)
+
+    @optionally_ignore_exceptions
+    def get_many(self, keys, version=None):
+        return super().get_many(keys, version)
+
+    @optionally_ignore_exceptions
+    def has_key(self, key, version=None):
+        return super().has_key(key, version)
+
+    @optionally_ignore_exceptions
+    def incr(self, key, delta=1, version=None):
+        return super().incr(key, delta, version)
+
+    @optionally_ignore_exceptions
+    def set_many(self, data, timeout=DEFAULT_TIMEOUT, version=None):
+        return super().set_many(data, timeout, version)
+
+    @optionally_ignore_exceptions
+    def delete_many(self, keys, version=None):
+        return super().delete_many(keys, version)
+
+    @optionally_ignore_exceptions
+    def clear(self):
+        return super().clear()
--- a/awx/main/conf.py
+++ b/awx/main/conf.py
@@ -94,6 +94,20 @@ register(
    category_slug='system',
 )

+register(
+    'CSRF_TRUSTED_ORIGINS',
+    default=[],
+    field_class=fields.StringListField,
+    label=_('CSRF Trusted Origins List'),
+    help_text=_(
+        "If the service is behind a reverse proxy/load balancer, use this setting "
+        "to configure the schema://addresses from which the service should trust "
+        "Origin header values. "
+    ),
+    category=_('System'),
+    category_slug='system',
+)
+
 register(
    'LICENSE',
    field_class=fields.DictField,
@@ -680,16 +694,18 @@ register(
    category_slug='logging',
 )
 register(
-    'LOG_AGGREGATOR_MAX_DISK_USAGE_GB',
+    'LOG_AGGREGATOR_ACTION_QUEUE_SIZE',
    field_class=fields.IntegerField,
-    default=1,
+    default=131072,
    min_value=1,
-    label=_('Maximum disk persistence for external log aggregation (in GB)'),
+    label=_('Maximum number of messages that can be stored in the log action queue'),
    help_text=_(
-        'Amount of data to store (in gigabytes) during an outage of '
-        'the external log aggregator (defaults to 1). '
-        'Equivalent to the rsyslogd queue.maxdiskspace setting for main_queue. '
-        'Notably, this is used for the rsyslogd main queue (for input messages).'
+        'Defines how large the rsyslog action queue can grow in number of messages '
+        'stored. This can have an impact on memory utilization. When the queue '
+        'reaches 75% of this number, the queue will start writing to disk '
+        '(queue.highWatermark in rsyslog). When it reaches 90%, NOTICE, INFO, and '
+        'DEBUG messages will start to be discarded (queue.discardMark with '
+        'queue.discardSeverity=5).'
    ),
    category=_('Logging'),
    category_slug='logging',
@@ -704,8 +720,7 @@ register(
        'Amount of data to store (in gigabytes) if an rsyslog action takes time '
        'to process an incoming message (defaults to 1). '
        'Equivalent to the rsyslogd queue.maxdiskspace setting on the action (e.g. omhttp). '
-        'Like LOG_AGGREGATOR_MAX_DISK_USAGE_GB, it stores files in the directory specified '
-        'by LOG_AGGREGATOR_MAX_DISK_USAGE_PATH.'
+        'It stores files in the directory specified by LOG_AGGREGATOR_MAX_DISK_USAGE_PATH.'
    ),
    category=_('Logging'),
    category_slug='logging',
@@ -848,6 +863,15 @@ register(
    category_slug='system',
 )

+register(
+    'HOST_METRIC_SUMMARY_TASK_LAST_TS',
+    field_class=fields.DateTimeField,
+    label=_('Last computing date of HostMetricSummaryMonthly'),
+    allow_null=True,
+    category=_('System'),
+    category_slug='system',
+)
+
 register(
    'AWX_CLEANUP_PATHS',
    field_class=fields.BooleanField,
--- a/awx/main/credential_plugins/conjur.py
+++ b/awx/main/credential_plugins/conjur.py
@@ -4,6 +4,8 @@ from urllib.parse import urljoin, quote

 from django.utils.translation import gettext_lazy as _
 import requests
+import base64
+import binascii


 conjur_inputs = {
@@ -50,6 +52,13 @@ conjur_inputs = {
 }


+def _is_base64(s: str) -> bool:
+    try:
+        return base64.b64encode(base64.b64decode(s.encode("utf-8"))) == s.encode("utf-8")
+    except binascii.Error:
+        return False
+
+
 def conjur_backend(**kwargs):
    url = kwargs['url']
    api_key = kwargs['api_key']
@@ -77,7 +86,7 @@ def conjur_backend(**kwargs):
    token = resp.content.decode('utf-8')

    lookup_kwargs = {
-        'headers': {'Authorization': 'Token token="{}"'.format(token)},
+        'headers': {'Authorization': 'Token token="{}"'.format(token if _is_base64(token) else base64.b64encode(token.encode('utf-8')).decode('utf-8'))},
        'allow_redirects': False,
    }

--- a/awx/main/credential_plugins/dsv.py
+++ b/awx/main/credential_plugins/dsv.py
@@ -2,25 +2,28 @@ from .plugin import CredentialPlugin

 from django.conf import settings
 from django.utils.translation import gettext_lazy as _
-from thycotic.secrets.vault import SecretsVault
-
+from delinea.secrets.vault import PasswordGrantAuthorizer, SecretsVault

 dsv_inputs = {
    'fields': [
        {
            'id': 'tenant',
            'label': _('Tenant'),
-            'help_text': _('The tenant e.g. "ex" when the URL is https://ex.secretservercloud.com'),
+            'help_text': _('The tenant e.g. "ex" when the URL is https://ex.secretsvaultcloud.com'),
            'type': 'string',
        },
        {
            'id': 'tld',
            'label': _('Top-level Domain (TLD)'),
-            'help_text': _('The TLD of the tenant e.g. "com" when the URL is https://ex.secretservercloud.com'),
-            'choices': ['ca', 'com', 'com.au', 'com.sg', 'eu'],
+            'help_text': _('The TLD of the tenant e.g. "com" when the URL is https://ex.secretsvaultcloud.com'),
+            'choices': ['ca', 'com', 'com.au', 'eu'],
            'default': 'com',
        },
-        {'id': 'client_id', 'label': _('Client ID'), 'type': 'string'},
+        {
+            'id': 'client_id',
+            'label': _('Client ID'),
+            'type': 'string',
+        },
        {
            'id': 'client_secret',
            'label': _('Client Secret'),
@@ -51,12 +54,26 @@ if settings.DEBUG:
            'id': 'url_template',
            'label': _('URL template'),
            'type': 'string',
-            'default': 'https://{}.secretsvaultcloud.{}/v1',
+            'default': 'https://{}.secretsvaultcloud.{}',
        }
    )

-dsv_plugin = CredentialPlugin(
-    'Thycotic DevOps Secrets Vault',
-    dsv_inputs,
-    lambda **kwargs: SecretsVault(**{k: v for (k, v) in kwargs.items() if k in [field['id'] for field in dsv_inputs['fields']]}).get_secret(kwargs['path'])['data'][kwargs['secret_field']],  # fmt: skip
-)
+
+def dsv_backend(**kwargs):
+    tenant_name = kwargs['tenant']
+    tenant_tld = kwargs.get('tld', 'com')
+    tenant_url_template = kwargs.get('url_template', 'https://{}.secretsvaultcloud.{}')
+    client_id = kwargs['client_id']
+    client_secret = kwargs['client_secret']
+    secret_path = kwargs['path']
+    secret_field = kwargs['secret_field']
+
+    tenant_url = tenant_url_template.format(tenant_name, tenant_tld.strip("."))
+
+    authorizer = PasswordGrantAuthorizer(tenant_url, client_id, client_secret)
+    dsv_secret = SecretsVault(tenant_url, authorizer).get_secret(secret_path)
+
+    return dsv_secret['data'][secret_field]
+
+
+dsv_plugin = CredentialPlugin(name='Thycotic DevOps Secrets Vault', inputs=dsv_inputs, backend=dsv_backend)
--- a/awx/main/credential_plugins/hashivault.py
+++ b/awx/main/credential_plugins/hashivault.py
@@ -265,6 +265,8 @@ def kv_backend(**kwargs):

    if secret_key:
        try:
+            if (secret_key != 'data') and (secret_key not in json['data']) and ('data' in json['data']):
+                return json['data']['data'][secret_key]
            return json['data'][secret_key]
        except KeyError:
            raise RuntimeError('{} is not present at {}'.format(secret_key, secret_path))
--- a/awx/main/credential_plugins/tss.py
+++ b/awx/main/credential_plugins/tss.py
@@ -1,7 +1,10 @@
 from .plugin import CredentialPlugin
 from django.utils.translation import gettext_lazy as _

-from thycotic.secrets.server import DomainPasswordGrantAuthorizer, PasswordGrantAuthorizer, SecretServer, ServerSecret
+try:
+    from delinea.secrets.server import DomainPasswordGrantAuthorizer, PasswordGrantAuthorizer, SecretServer, ServerSecret
+except ImportError:
+    from thycotic.secrets.server import DomainPasswordGrantAuthorizer, PasswordGrantAuthorizer, SecretServer, ServerSecret

 tss_inputs = {
    'fields': [
@@ -51,7 +54,9 @@ tss_inputs = {

 def tss_backend(**kwargs):
    if kwargs.get("domain"):
-        authorizer = DomainPasswordGrantAuthorizer(kwargs['server_url'], kwargs['username'], kwargs['password'], kwargs['domain'])
+        authorizer = DomainPasswordGrantAuthorizer(
+            base_url=kwargs['server_url'], username=kwargs['username'], domain=kwargs['domain'], password=kwargs['password']
+        )
    else:
        authorizer = PasswordGrantAuthorizer(kwargs['server_url'], kwargs['username'], kwargs['password'])
    secret_server = SecretServer(kwargs['server_url'], authorizer)
--- a/awx/main/dispatch/init.py
+++ b/awx/main/dispatch/init.py
@@ -40,8 +40,12 @@ def get_task_queuename():


 class PubSub(object):
-    def __init__(self, conn):
+    def __init__(self, conn, select_timeout=None):
        self.conn = conn
+        if select_timeout is None:
+            self.select_timeout = 5
+        else:
+            self.select_timeout = select_timeout

    def listen(self, channel):
        with self.conn.cursor() as cur:
@@ -55,16 +59,33 @@ class PubSub(object):
        with self.conn.cursor() as cur:
            cur.execute('SELECT pg_notify(%s, %s);', (channel, payload))

-    def events(self, select_timeout=5, yield_timeouts=False):
+    @staticmethod
+    def current_notifies(conn):
+        """
+        Altered version of .notifies method from psycopg library
+        This removes the outer while True loop so that we only process
+        queued notifications
+        """
+        with conn.lock:
+            try:
+                ns = conn.wait(psycopg.generators.notifies(conn.pgconn))
+            except psycopg.errors._NO_TRACEBACK as ex:
+                raise ex.with_traceback(None)
+        enc = psycopg._encodings.pgconn_encoding(conn.pgconn)
+        for pgn in ns:
+            n = psycopg.connection.Notify(pgn.relname.decode(enc), pgn.extra.decode(enc), pgn.be_pid)
+            yield n
+
+    def events(self, yield_timeouts=False):
        if not self.conn.autocommit:
            raise RuntimeError('Listening for events can only be done in autocommit mode')

        while True:
-            if select.select([self.conn], [], [], select_timeout) == NOT_READY:
+            if select.select([self.conn], [], [], self.select_timeout) == NOT_READY:
                if yield_timeouts:
                    yield None
            else:
-                notification_generator = self.conn.notifies()
+                notification_generator = self.current_notifies(self.conn)
                for notification in notification_generator:
                    yield notification

@@ -73,7 +94,7 @@ class PubSub(object):


@contextmanager
-def pg_bus_conn(new_connection=False):
+def pg_bus_conn(new_connection=False, select_timeout=None):
    '''
    Any listeners probably want to establish a new database connection,
    separate from the Django connection used for queries, because that will prevent
@@ -98,7 +119,7 @@ def pg_bus_conn(new_connection=False):
            raise RuntimeError('Unexpectedly could not connect to postgres for pg_notify actions')
        conn = pg_connection.connection

-    pubsub = PubSub(conn)
+    pubsub = PubSub(conn, select_timeout=select_timeout)
    yield pubsub
    if new_connection:
        conn.close()
--- a/awx/main/dispatch/control.py
+++ b/awx/main/dispatch/control.py
@@ -37,8 +37,14 @@ class Control(object):
    def running(self, *args, **kwargs):
        return self.control_with_reply('running', *args, **kwargs)

-    def cancel(self, task_ids, *args, **kwargs):
-        return self.control_with_reply('cancel', *args, extra_data={'task_ids': task_ids}, **kwargs)
+    def cancel(self, task_ids, with_reply=True):
+        if with_reply:
+            return self.control_with_reply('cancel', extra_data={'task_ids': task_ids})
+        else:
+            self.control({'control': 'cancel', 'task_ids': task_ids, 'reply_to': None}, extra_data={'task_ids': task_ids})
+
+    def schedule(self, *args, **kwargs):
+        return self.control_with_reply('schedule', *args, **kwargs)

    @classmethod
    def generate_reply_queue_name(cls):
@@ -52,14 +58,14 @@ class Control(object):
        if not connection.get_autocommit():
            raise RuntimeError('Control-with-reply messages can only be done in autocommit mode')

-        with pg_bus_conn() as conn:
+        with pg_bus_conn(select_timeout=timeout) as conn:
            conn.listen(reply_queue)
            send_data = {'control': command, 'reply_to': reply_queue}
            if extra_data:
                send_data.update(extra_data)
            conn.notify(self.queuename, json.dumps(send_data))

-            for reply in conn.events(select_timeout=timeout, yield_timeouts=True):
+            for reply in conn.events(yield_timeouts=True):
                if reply is None:
                    logger.error(f'{self.service} did not reply within {timeout}s')
                    raise RuntimeError(f"{self.service} did not reply within {timeout}s")
--- a/awx/main/dispatch/periodic.py
+++ b/awx/main/dispatch/periodic.py
@@ -1,57 +1,142 @@
 import logging
-import os
 import time
-from multiprocessing import Process
+import yaml
+from datetime import datetime

-from django.conf import settings
-from django.db import connections
-from schedule import Scheduler
-from django_guid import set_guid
-from django_guid.utils import generate_guid
-
-from awx.main.dispatch.worker import TaskWorker
-from awx.main.utils.db import set_connection_name

 logger = logging.getLogger('awx.main.dispatch.periodic')


-class Scheduler(Scheduler):
-    def run_continuously(self):
-        idle_seconds = max(1, min(self.jobs).period.total_seconds() / 2)
+class ScheduledTask:
+    """
+    Class representing schedules, very loosely modeled after python schedule library Job
+    the idea of this class is to:
+     - only deal in relative times (time since the scheduler global start)
+     - only deal in integer math for target runtimes, but float for current relative time

-        def run():
-            ppid = os.getppid()
-            logger.warning('periodic beat started')
+    Missed schedule policy:
+    Invariant target times are maintained, meaning that if interval=10s offset=0
+    and it runs at t=7s, then it calls for next run in 3s.
+    However, if a complete interval has passed, that is counted as a missed run,
+    and missed runs are abandoned (no catch-up runs).
+    """

-            set_connection_name('periodic')  # set application_name to distinguish from other dispatcher processes
+    def __init__(self, name: str, data: dict):
+        # parameters need for schedule computation
+        self.interval = int(data['schedule'].total_seconds())
+        self.offset = 0  # offset relative to start time this schedule begins
+        self.index = 0  # number of periods of the schedule that has passed

-            while True:
-                if os.getppid() != ppid:
-                    # if the parent PID changes, this process has been orphaned
-                    # via e.g., segfault or sigkill, we should exit too
-                    pid = os.getpid()
-                    logger.warning(f'periodic beat exiting gracefully pid:{pid}')
-                    raise SystemExit()
-                try:
-                    for conn in connections.all():
-                        # If the database connection has a hiccup, re-establish a new
-                        # connection
-                        conn.close_if_unusable_or_obsolete()
-                    set_guid(generate_guid())
-                    self.run_pending()
-                except Exception:
-                    logger.exception('encountered an error while scheduling periodic tasks')
-                time.sleep(idle_seconds)
+        # parameters that do not affect scheduling logic
+        self.last_run = None  # time of last run, only used for debug
+        self.completed_runs = 0  # number of times schedule is known to run
+        self.name = name
+        self.data = data  # used by caller to know what to run

-        process = Process(target=run)
-        process.daemon = True
-        process.start()
+    @property
+    def next_run(self):
+        "Time until the next run with t=0 being the global_start of the scheduler class"
+        return (self.index + 1) * self.interval + self.offset
+
+    def due_to_run(self, relative_time):
+        return bool(self.next_run <= relative_time)
+
+    def expected_runs(self, relative_time):
+        return int((relative_time - self.offset) / self.interval)
+
+    def mark_run(self, relative_time):
+        self.last_run = relative_time
+        self.completed_runs += 1
+        new_index = self.expected_runs(relative_time)
+        if new_index > self.index + 1:
+            logger.warning(f'Missed {new_index - self.index - 1} schedules of {self.name}')
+        self.index = new_index
+
+    def missed_runs(self, relative_time):
+        "Number of times job was supposed to ran but failed to, only used for debug"
+        missed_ct = self.expected_runs(relative_time) - self.completed_runs
+        # if this is currently due to run do not count that as a missed run
+        if missed_ct and self.due_to_run(relative_time):
+            missed_ct -= 1
+        return missed_ct


-def run_continuously():
-    scheduler = Scheduler()
-    for task in settings.CELERYBEAT_SCHEDULE.values():
-        apply_async = TaskWorker.resolve_callable(task['task']).apply_async
-        total_seconds = task['schedule'].total_seconds()
-        scheduler.every(total_seconds).seconds.do(apply_async)
-    scheduler.run_continuously()
+class Scheduler:
+    def __init__(self, schedule):
+        """
+        Expects schedule in the form of a dictionary like
+        {
+            'job1': {'schedule': timedelta(seconds=50), 'other': 'stuff'}
+        }
+        Only the schedule nearest-second value is used for scheduling,
+        the rest of the data is for use by the caller to know what to run.
+        """
+        self.jobs = [ScheduledTask(name, data) for name, data in schedule.items()]
+        min_interval = min(job.interval for job in self.jobs)
+        num_jobs = len(self.jobs)
+
+        # this is intentionally oppioniated against spammy schedules
+        # a core goal is to spread out the scheduled tasks (for worker management)
+        # and high-frequency schedules just do not work with that
+        if num_jobs > min_interval:
+            raise RuntimeError(f'Number of schedules ({num_jobs}) is more than the shortest schedule interval ({min_interval} seconds).')
+
+        # even space out jobs over the base interval
+        for i, job in enumerate(self.jobs):
+            job.offset = (i * min_interval) // num_jobs
+
+        # internally times are all referenced relative to startup time, add grace period
+        self.global_start = time.time() + 2.0
+
+    def get_and_mark_pending(self):
+        relative_time = time.time() - self.global_start
+        to_run = []
+        for job in self.jobs:
+            if job.due_to_run(relative_time):
+                to_run.append(job)
+                logger.debug(f'scheduler found {job.name} to run, {relative_time - job.next_run} seconds after target')
+                job.mark_run(relative_time)
+        return to_run
+
+    def time_until_next_run(self):
+        relative_time = time.time() - self.global_start
+        next_job = min(self.jobs, key=lambda j: j.next_run)
+        delta = next_job.next_run - relative_time
+        if delta <= 0.1:
+            # careful not to give 0 or negative values to the select timeout, which has unclear interpretation
+            logger.warning(f'Scheduler next run of {next_job.name} is {-delta} seconds in the past')
+            return 0.1
+        elif delta > 20.0:
+            logger.warning(f'Scheduler next run unexpectedly over 20 seconds in future: {delta}')
+            return 20.0
+        logger.debug(f'Scheduler next run is {next_job.name} in {delta} seconds')
+        return delta
+
+    def debug(self, *args, **kwargs):
+        data = dict()
+        data['title'] = 'Scheduler status'
+
+        now = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S UTC')
+        start_time = datetime.fromtimestamp(self.global_start).strftime('%Y-%m-%d %H:%M:%S UTC')
+        relative_time = time.time() - self.global_start
+        data['started_time'] = start_time
+        data['current_time'] = now
+        data['current_time_relative'] = round(relative_time, 3)
+        data['total_schedules'] = len(self.jobs)
+
+        data['schedule_list'] = dict(
+            [
+                (
+                    job.name,
+                    dict(
+                        last_run_seconds_ago=round(relative_time - job.last_run, 3) if job.last_run else None,
+                        next_run_in_seconds=round(job.next_run - relative_time, 3),
+                        offset_in_seconds=job.offset,
+                        completed_runs=job.completed_runs,
+                        missed_runs=job.missed_runs(relative_time),
+                    ),
+                )
+                for job in sorted(self.jobs, key=lambda job: job.interval)
+            ]
+        )
+        return yaml.safe_dump(data, default_flow_style=False, sort_keys=False)
--- a/awx/main/dispatch/pool.py
+++ b/awx/main/dispatch/pool.py
@@ -417,16 +417,16 @@ class AutoscalePool(WorkerPool):
                # the task manager to never do more work
                current_task = w.current_task
                if current_task and isinstance(current_task, dict):
-                    endings = ['tasks.task_manager', 'tasks.dependency_manager', 'tasks.workflow_manager']
+                    endings = ('tasks.task_manager', 'tasks.dependency_manager', 'tasks.workflow_manager')
                    current_task_name = current_task.get('task', '')
-                    if any(current_task_name.endswith(e) for e in endings):
+                    if current_task_name.endswith(endings):
                        if 'started' not in current_task:
                            w.managed_tasks[current_task['uuid']]['started'] = time.time()
                        age = time.time() - current_task['started']
                        w.managed_tasks[current_task['uuid']]['age'] = age
                        if age > self.task_manager_timeout:
-                            logger.error(f'{current_task_name} has held the advisory lock for {age}, sending SIGTERM to {w.pid}')
-                            os.kill(w.pid, signal.SIGTERM)
+                            logger.error(f'{current_task_name} has held the advisory lock for {age}, sending SIGUSR1 to {w.pid}')
+                            os.kill(w.pid, signal.SIGUSR1)

        for m in orphaned:
            # if all the workers are dead, spawn at least one
--- a/awx/main/dispatch/publish.py
+++ b/awx/main/dispatch/publish.py
@@ -73,15 +73,15 @@ class task:
                return cls.apply_async(args, kwargs)

            @classmethod
-            def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw):
+            def get_async_body(cls, args=None, kwargs=None, uuid=None, **kw):
+                """
+                Get the python dict to become JSON data in the pg_notify message
+                This same message gets passed over the dispatcher IPC queue to workers
+                If a task is submitted to a multiprocessing pool, skipping pg_notify, this might be used directly
+                """
                task_id = uuid or str(uuid4())
                args = args or []
                kwargs = kwargs or {}
-                queue = queue or getattr(cls.queue, 'im_func', cls.queue)
-                if not queue:
-                    msg = f'{cls.name}: Queue value required and may not be None'
-                    logger.error(msg)
-                    raise ValueError(msg)
                obj = {'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name, 'time_pub': time.time()}
                guid = get_guid()
                if guid:
@@ -89,6 +89,16 @@ class task:
                if bind_kwargs:
                    obj['bind_kwargs'] = bind_kwargs
                obj.update(**kw)
+                return obj
+
+            @classmethod
+            def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw):
+                queue = queue or getattr(cls.queue, 'im_func', cls.queue)
+                if not queue:
+                    msg = f'{cls.name}: Queue value required and may not be None'
+                    logger.error(msg)
+                    raise ValueError(msg)
+                obj = cls.get_async_body(args=args, kwargs=kwargs, uuid=uuid, **kw)
                if callable(queue):
                    queue = queue()
                if not is_testing():
@@ -116,4 +126,5 @@ class task:
        setattr(fn, 'name', cls.name)
        setattr(fn, 'apply_async', cls.apply_async)
        setattr(fn, 'delay', cls.delay)
+        setattr(fn, 'get_async_body', cls.get_async_body)
        return fn
--- a/awx/main/dispatch/worker/base.py
+++ b/awx/main/dispatch/worker/base.py
@@ -11,11 +11,13 @@ import psycopg
 import time
 from uuid import UUID
 from queue import Empty as QueueEmpty
+from datetime import timedelta

 from django import db
 from django.conf import settings

 from awx.main.dispatch.pool import WorkerPool
+from awx.main.dispatch.periodic import Scheduler
 from awx.main.dispatch import pg_bus_conn
 from awx.main.utils.common import log_excess_runtime
 from awx.main.utils.db import set_connection_name
@@ -64,10 +66,12 @@ class AWXConsumerBase(object):
    def control(self, body):
        logger.warning(f'Received control signal:\n{body}')
        control = body.get('control')
-        if control in ('status', 'running', 'cancel'):
+        if control in ('status', 'schedule', 'running', 'cancel'):
            reply_queue = body['reply_to']
            if control == 'status':
                msg = '\n'.join([self.listening_on, self.pool.debug()])
+            if control == 'schedule':
+                msg = self.scheduler.debug()
            elif control == 'running':
                msg = []
                for worker in self.pool.workers:
@@ -85,24 +89,20 @@ class AWXConsumerBase(object):
                if task_ids and not msg:
                    logger.info(f'Could not locate running tasks to cancel with ids={task_ids}')

-            with pg_bus_conn() as conn:
-                conn.notify(reply_queue, json.dumps(msg))
+            if reply_queue is not None:
+                with pg_bus_conn() as conn:
+                    conn.notify(reply_queue, json.dumps(msg))
        elif control == 'reload':
            for worker in self.pool.workers:
                worker.quit()
        else:
            logger.error('unrecognized control message: {}'.format(control))

-    def process_task(self, body):
+    def dispatch_task(self, body):
+        """This will place the given body into a worker queue to run method decorated as a task"""
        if isinstance(body, dict):
            body['time_ack'] = time.time()

-        if 'control' in body:
-            try:
-                return self.control(body)
-            except Exception:
-                logger.exception(f"Exception handling control message: {body}")
-                return
        if len(self.pool):
            if "uuid" in body and body['uuid']:
                try:
@@ -116,15 +116,24 @@ class AWXConsumerBase(object):
        self.pool.write(queue, body)
        self.total_messages += 1

+    def process_task(self, body):
+        """Routes the task details in body as either a control task or a task-task"""
+        if 'control' in body:
+            try:
+                return self.control(body)
+            except Exception:
+                logger.exception(f"Exception handling control message: {body}")
+                return
+        self.dispatch_task(body)
+
    @log_excess_runtime(logger)
    def record_statistics(self):
        if time.time() - self.last_stats > 1:  # buffer stat recording to once per second
            try:
                self.redis.set(f'awx_{self.name}_statistics', self.pool.debug())
-                self.last_stats = time.time()
            except Exception:
                logger.exception(f"encountered an error communicating with redis to store {self.name} statistics")
-                self.last_stats = time.time()
+            self.last_stats = time.time()

    def run(self, *args, **kwargs):
        signal.signal(signal.SIGINT, self.stop)
@@ -151,9 +160,9 @@ class AWXConsumerRedis(AWXConsumerBase):


 class AWXConsumerPG(AWXConsumerBase):
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, schedule=None, **kwargs):
        super().__init__(*args, **kwargs)
-        self.pg_max_wait = settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE
+        self.pg_max_wait = settings.DISPATCHER_DB_DOWNTIME_TOLERANCE
        # if no successful loops have ran since startup, then we should fail right away
        self.pg_is_down = True  # set so that we fail if we get database errors on startup
        init_time = time.time()
@@ -162,24 +171,53 @@ class AWXConsumerPG(AWXConsumerBase):
        self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False)
        self.last_metrics_gather = init_time
        self.listen_cumulative_time = 0.0
+        if schedule:
+            schedule = schedule.copy()
+        else:
+            schedule = {}
+        # add control tasks to be ran at regular schedules
+        # NOTE: if we run out of database connections, it is important to still run cleanup
+        # so that we scale down workers and free up connections
+        schedule['pool_cleanup'] = {'control': self.pool.cleanup, 'schedule': timedelta(seconds=60)}
+        # record subsystem metrics for the dispatcher
+        schedule['metrics_gather'] = {'control': self.record_metrics, 'schedule': timedelta(seconds=20)}
+        self.scheduler = Scheduler(schedule)
+
+    def record_metrics(self):
+        current_time = time.time()
+        self.pool.produce_subsystem_metrics(self.subsystem_metrics)
+        self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
+        self.subsystem_metrics.pipe_execute()
+        self.listen_cumulative_time = 0.0
+        self.last_metrics_gather = current_time

    def run_periodic_tasks(self):
-        self.record_statistics()  # maintains time buffer in method
+        """
+        Run general periodic logic, and return maximum time in seconds before
+        the next requested run
+        This may be called more often than that when events are consumed
+        so this should be very efficient in that
+        """
+        try:
+            self.record_statistics()  # maintains time buffer in method
+        except Exception as exc:
+            logger.warning(f'Failed to save dispatcher statistics {exc}')

-        current_time = time.time()
-        if current_time - self.last_cleanup > 60:  # same as cluster_node_heartbeat
-            # NOTE: if we run out of database connections, it is important to still run cleanup
-            # so that we scale down workers and free up connections
-            self.pool.cleanup()
-            self.last_cleanup = current_time
+        for job in self.scheduler.get_and_mark_pending():
+            if 'control' in job.data:
+                try:
+                    job.data['control']()
+                except Exception:
+                    logger.exception(f'Error running control task {job.data}')
+            elif 'task' in job.data:
+                body = self.worker.resolve_callable(job.data['task']).get_async_body()
+                # bypasses pg_notify for scheduled tasks
+                self.dispatch_task(body)

-        # record subsystem metrics for the dispatcher
-        if current_time - self.last_metrics_gather > 20:
-            self.pool.produce_subsystem_metrics(self.subsystem_metrics)
-            self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
-            self.subsystem_metrics.pipe_execute()
-            self.listen_cumulative_time = 0.0
-            self.last_metrics_gather = current_time
+        self.pg_is_down = False
+        self.listen_start = time.time()
+
+        return self.scheduler.time_until_next_run()

    def run(self, *args, **kwargs):
        super(AWXConsumerPG, self).run(*args, **kwargs)
@@ -195,14 +233,15 @@ class AWXConsumerPG(AWXConsumerBase):
                    if init is False:
                        self.worker.on_start()
                        init = True
-                    self.listen_start = time.time()
+                    # run_periodic_tasks run scheduled actions and gives time until next scheduled action
+                    # this is saved to the conn (PubSub) object in order to modify read timeout in-loop
+                    conn.select_timeout = self.run_periodic_tasks()
+                    # this is the main operational loop for awx-manage run_dispatcher
                    for e in conn.events(yield_timeouts=True):
-                        self.listen_cumulative_time += time.time() - self.listen_start
+                        self.listen_cumulative_time += time.time() - self.listen_start  # for metrics
                        if e is not None:
                            self.process_task(json.loads(e.payload))
-                        self.run_periodic_tasks()
-                        self.pg_is_down = False
-                        self.listen_start = time.time()
+                        conn.select_timeout = self.run_periodic_tasks()
                    if self.should_stop:
                        return
            except psycopg.InterfaceError:
@@ -250,8 +289,8 @@ class BaseWorker(object):
                    break
            except QueueEmpty:
                continue
-            except Exception as e:
-                logger.error("Exception on worker {}, restarting: ".format(idx) + str(e))
+            except Exception:
+                logger.exception("Exception on worker {}, reconnecting: ".format(idx))
                continue
            try:
                for conn in db.connections.all():
--- a/awx/main/management/commands/cleanup_activitystream.py
+++ b/awx/main/management/commands/cleanup_activitystream.py
@@ -24,6 +24,9 @@ class Command(BaseCommand):
    def add_arguments(self, parser):
        parser.add_argument('--days', dest='days', type=int, default=90, metavar='N', help='Remove activity stream events more than N days old')
        parser.add_argument('--dry-run', dest='dry_run', action='store_true', default=False, help='Dry run mode (show items that would be removed)')
+        parser.add_argument(
+            '--batch-size', dest='batch_size', type=int, default=500, metavar='X', help='Remove activity stream events in batch of X events. Defaults to 500.'
+        )

    def init_logging(self):
        log_levels = dict(enumerate([logging.ERROR, logging.INFO, logging.DEBUG, 0]))
@@ -48,7 +51,7 @@ class Command(BaseCommand):
                else:
                    pks_to_delete.add(asobj.pk)
            # Cleanup objects in batches instead of deleting each one individually.
-            if len(pks_to_delete) >= 500:
+            if len(pks_to_delete) >= self.batch_size:
                ActivityStream.objects.filter(pk__in=pks_to_delete).delete()
                n_deleted_items += len(pks_to_delete)
                pks_to_delete.clear()
@@ -63,4 +66,5 @@ class Command(BaseCommand):
        self.days = int(options.get('days', 30))
        self.cutoff = now() - datetime.timedelta(days=self.days)
        self.dry_run = bool(options.get('dry_run', False))
+        self.batch_size = int(options.get('batch_size', 500))
        self.cleanup_activitystream()
--- a/awx/main/management/commands/cleanup_host_metrics.py
+++ b/awx/main/management/commands/cleanup_host_metrics.py
@@ -1,22 +1,22 @@
-from awx.main.models import HostMetric
 from django.core.management.base import BaseCommand
 from django.conf import settings
+from awx.main.tasks.host_metrics import HostMetricTask


 class Command(BaseCommand):
    """
-    Run soft-deleting of HostMetrics
+    This command provides cleanup task for HostMetric model.
+    There are two modes, which run in following order:
+    - soft cleanup
+    - - Perform soft-deletion of all host metrics last automated 12 months ago or before.
+        This is the same as issuing a DELETE request to /api/v2/host_metrics/N/ for all host metrics that match the criteria.
+    - - updates columns delete, deleted_counter and last_deleted
+    - hard cleanup
+    - - Permanently erase from the database all host metrics last automated 36 months ago or before.
+        This operation happens after the soft deletion has finished.
    """

-    help = 'Run soft-deleting of HostMetrics'
-
-    def add_arguments(self, parser):
-        parser.add_argument('--months-ago', type=int, dest='months-ago', action='store', help='Threshold in months for soft-deleting')
+    help = 'Run soft and hard-deletion of HostMetrics'

    def handle(self, *args, **options):
-        months_ago = options.get('months-ago') or None
-
-        if not months_ago:
-            months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_THRESHOLD', 12)
-
-        HostMetric.cleanup_task(months_ago)
+        HostMetricTask().cleanup(soft_threshold=settings.CLEANUP_HOST_METRICS_SOFT_THRESHOLD, hard_threshold=settings.CLEANUP_HOST_METRICS_HARD_THRESHOLD)
--- a/awx/main/management/commands/cleanup_jobs.py
+++ b/awx/main/management/commands/cleanup_jobs.py
@@ -9,6 +9,7 @@ import re


 # Django
+from django.apps import apps
 from django.core.management.base import BaseCommand, CommandError
 from django.db import transaction, connection
 from django.db.models import Min, Max
@@ -150,6 +151,9 @@ class Command(BaseCommand):
    def add_arguments(self, parser):
        parser.add_argument('--days', dest='days', type=int, default=90, metavar='N', help='Remove jobs/updates executed more than N days ago. Defaults to 90.')
        parser.add_argument('--dry-run', dest='dry_run', action='store_true', default=False, help='Dry run mode (show items that would be removed)')
+        parser.add_argument(
+            '--batch-size', dest='batch_size', type=int, default=100000, metavar='X', help='Remove jobs in batch of X jobs. Defaults to 100000.'
+        )
        parser.add_argument('--jobs', dest='only_jobs', action='store_true', default=False, help='Remove jobs')
        parser.add_argument('--ad-hoc-commands', dest='only_ad_hoc_commands', action='store_true', default=False, help='Remove ad hoc commands')
        parser.add_argument('--project-updates', dest='only_project_updates', action='store_true', default=False, help='Remove project updates')
@@ -195,18 +199,58 @@ class Command(BaseCommand):
        delete_meta.delete_jobs()
        return (delete_meta.jobs_no_delete_count, delete_meta.jobs_to_delete_count)

-    def _cascade_delete_job_events(self, model, pk_list):
+    def has_unpartitioned_table(self, model):
+        tblname = unified_job_class_to_event_table_name(model)
+        with connection.cursor() as cursor:
+            cursor.execute(f"SELECT 1 FROM pg_tables WHERE tablename = '_unpartitioned_{tblname}';")
+            row = cursor.fetchone()
+            if row is None:
+                return False
+        return True
+
+    def _delete_unpartitioned_table(self, model):
+        "If the unpartitioned table is no longer necessary, it will drop the table"
+        tblname = unified_job_class_to_event_table_name(model)
+        if not self.has_unpartitioned_table(model):
+            self.logger.debug(f'Table _unpartitioned_{tblname} does not exist, you are fully migrated.')
+            return
+
+        with connection.cursor() as cursor:
+            # same as UnpartitionedJobEvent.objects.aggregate(Max('created'))
+            cursor.execute(f'SELECT MAX("_unpartitioned_{tblname}"."created") FROM "_unpartitioned_{tblname}";')
+            row = cursor.fetchone()
+            last_created = row[0]
+
+        if last_created:
+            self.logger.info(f'Last event created in _unpartitioned_{tblname} was {last_created.isoformat()}')
+        else:
+            self.logger.info(f'Table _unpartitioned_{tblname} has no events in it')
+
+        if (last_created is None) or (last_created < self.cutoff):
+            self.logger.warning(
+                f'Dropping table _unpartitioned_{tblname} since no records are newer than {self.cutoff}\n'
+                'WARNING - this will happen in a separate transaction so a failure will not roll back prior cleanup'
+            )
+            with connection.cursor() as cursor:
+                cursor.execute(f'DROP TABLE _unpartitioned_{tblname};')
+
+    def _delete_unpartitioned_events(self, model, pk_list):
+        "If unpartitioned job events remain, it will cascade those from jobs in pk_list"
+        tblname = unified_job_class_to_event_table_name(model)
+        rel_name = model().event_parent_key
+
+        # Bail if the unpartitioned table does not exist anymore
+        if not self.has_unpartitioned_table(model):
+            return
+
+        # Table still exists, delete individual unpartitioned events
        if pk_list:
            with connection.cursor() as cursor:
-                tblname = unified_job_class_to_event_table_name(model)
-
+                self.logger.debug(f'Deleting {len(pk_list)} events from _unpartitioned_{tblname}, use a longer cleanup window to delete the table.')
                pk_list_csv = ','.join(map(str, pk_list))
-                rel_name = model().event_parent_key
-                cursor.execute(f"DELETE FROM _unpartitioned_{tblname} WHERE {rel_name} IN ({pk_list_csv})")
+                cursor.execute(f"DELETE FROM _unpartitioned_{tblname} WHERE {rel_name} IN ({pk_list_csv});")

    def cleanup_jobs(self):
-        batch_size = 100000
-
        # Hack to avoid doing N+1 queries as each item in the Job query set does
        # an individual query to get the underlying UnifiedJob.
        Job.polymorphic_super_sub_accessors_replaced = True
@@ -221,13 +265,14 @@ class Command(BaseCommand):
        deleted = 0
        info = qs.aggregate(min=Min('id'), max=Max('id'))
        if info['min'] is not None:
-            for start in range(info['min'], info['max'] + 1, batch_size):
-                qs_batch = qs.filter(id__gte=start, id__lte=start + batch_size)
+            for start in range(info['min'], info['max'] + 1, self.batch_size):
+                qs_batch = qs.filter(id__gte=start, id__lte=start + self.batch_size)
                pk_list = qs_batch.values_list('id', flat=True)

                _, results = qs_batch.delete()
                deleted += results['main.Job']
-                self._cascade_delete_job_events(Job, pk_list)
+                # Avoid dropping the job event table in case we have interacted with it already
+                self._delete_unpartitioned_events(Job, pk_list)

        return skipped, deleted

@@ -250,7 +295,7 @@ class Command(BaseCommand):
                deleted += 1

        if not self.dry_run:
-            self._cascade_delete_job_events(AdHocCommand, pk_list)
+            self._delete_unpartitioned_events(AdHocCommand, pk_list)

        skipped += AdHocCommand.objects.filter(created__gte=self.cutoff).count()
        return skipped, deleted
@@ -278,7 +323,7 @@ class Command(BaseCommand):
                deleted += 1

        if not self.dry_run:
-            self._cascade_delete_job_events(ProjectUpdate, pk_list)
+            self._delete_unpartitioned_events(ProjectUpdate, pk_list)

        skipped += ProjectUpdate.objects.filter(created__gte=self.cutoff).count()
        return skipped, deleted
@@ -306,7 +351,7 @@ class Command(BaseCommand):
                deleted += 1

        if not self.dry_run:
-            self._cascade_delete_job_events(InventoryUpdate, pk_list)
+            self._delete_unpartitioned_events(InventoryUpdate, pk_list)

        skipped += InventoryUpdate.objects.filter(created__gte=self.cutoff).count()
        return skipped, deleted
@@ -330,7 +375,7 @@ class Command(BaseCommand):
                deleted += 1

        if not self.dry_run:
-            self._cascade_delete_job_events(SystemJob, pk_list)
+            self._delete_unpartitioned_events(SystemJob, pk_list)

        skipped += SystemJob.objects.filter(created__gte=self.cutoff).count()
        return skipped, deleted
@@ -375,12 +420,12 @@ class Command(BaseCommand):
        skipped += Notification.objects.filter(created__gte=self.cutoff).count()
        return skipped, deleted

-    @transaction.atomic
    def handle(self, *args, **options):
        self.verbosity = int(options.get('verbosity', 1))
        self.init_logging()
        self.days = int(options.get('days', 90))
        self.dry_run = bool(options.get('dry_run', False))
+        self.batch_size = int(options.get('batch_size', 100000))
        try:
            self.cutoff = now() - datetime.timedelta(days=self.days)
        except OverflowError:
@@ -402,19 +447,29 @@ class Command(BaseCommand):
                del s.receivers[:]
                s.sender_receivers_cache.clear()

-        for m in model_names:
-            if m not in models_to_cleanup:
-                continue
+        with transaction.atomic():
+            for m in models_to_cleanup:
+                skipped, deleted = getattr(self, 'cleanup_%s' % m)()

-            skipped, deleted = getattr(self, 'cleanup_%s' % m)()
+                func = getattr(self, 'cleanup_%s_partition' % m, None)
+                if func:
+                    skipped_partition, deleted_partition = func()
+                    skipped += skipped_partition
+                    deleted += deleted_partition

-            func = getattr(self, 'cleanup_%s_partition' % m, None)
-            if func:
-                skipped_partition, deleted_partition = func()
-                skipped += skipped_partition
-                deleted += deleted_partition
+                if self.dry_run:
+                    self.logger.log(99, '%s: %d would be deleted, %d would be skipped.', m.replace('_', ' '), deleted, skipped)
+                else:
+                    self.logger.log(99, '%s: %d deleted, %d skipped.', m.replace('_', ' '), deleted, skipped)

-            if self.dry_run:
-                self.logger.log(99, '%s: %d would be deleted, %d would be skipped.', m.replace('_', ' '), deleted, skipped)
-            else:
-                self.logger.log(99, '%s: %d deleted, %d skipped.', m.replace('_', ' '), deleted, skipped)
+        # Deleting unpartitioned tables cannot be done in same transaction as updates to related tables
+        if not self.dry_run:
+            with transaction.atomic():
+                for m in models_to_cleanup:
+                    unified_job_class_name = m[:-1].title().replace('Management', 'System').replace('_', '')
+                    unified_job_class = apps.get_model('main', unified_job_class_name)
+                    try:
+                        unified_job_class().event_class
+                    except (NotImplementedError, AttributeError):
+                        continue  # no need to run this for models without events
+                    self._delete_unpartitioned_table(unified_job_class)
--- a/awx/main/management/commands/host_metric_summary_monthly.py
+++ b/awx/main/management/commands/host_metric_summary_monthly.py
@@ -0,0 +1,9 @@
+from django.core.management.base import BaseCommand
+from awx.main.tasks.host_metrics import HostMetricSummaryMonthlyTask
+
+
+class Command(BaseCommand):
+    help = 'Computing of HostMetricSummaryMonthly'
+
+    def handle(self, *args, **options):
+        HostMetricSummaryMonthlyTask().execute()
--- a/awx/main/management/commands/provision_instance.py
+++ b/awx/main/management/commands/provision_instance.py
@@ -25,17 +25,20 @@ class Command(BaseCommand):

    def add_arguments(self, parser):
        parser.add_argument('--hostname', dest='hostname', type=str, help="Hostname used during provisioning")
+        parser.add_argument('--listener_port', dest='listener_port', type=int, help="Receptor listener port")
        parser.add_argument('--node_type', type=str, default='hybrid', choices=['control', 'execution', 'hop', 'hybrid'], help="Instance Node type")
        parser.add_argument('--uuid', type=str, help="Instance UUID")

-    def _register_hostname(self, hostname, node_type, uuid):
+    def _register_hostname(self, hostname, node_type, uuid, listener_port):
        if not hostname:
            if not settings.AWX_AUTO_DEPROVISION_INSTANCES:
                raise CommandError('Registering with values from settings only intended for use in K8s installs')

            from awx.main.management.commands.register_queue import RegisterQueue

-            (changed, instance) = Instance.objects.register(ip_address=os.environ.get('MY_POD_IP'), node_type='control', node_uuid=settings.SYSTEM_UUID)
+            (changed, instance) = Instance.objects.register(
+                ip_address=os.environ.get('MY_POD_IP'), listener_port=listener_port, node_type='control', node_uuid=settings.SYSTEM_UUID
+            )
            RegisterQueue(settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME, 100, 0, [], is_container_group=False).register()
            RegisterQueue(
                settings.DEFAULT_EXECUTION_QUEUE_NAME,
@@ -48,7 +51,7 @@ class Command(BaseCommand):
                max_concurrent_jobs=settings.DEFAULT_EXECUTION_QUEUE_MAX_CONCURRENT_JOBS,
            ).register()
        else:
-            (changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, node_uuid=uuid)
+            (changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, node_uuid=uuid, listener_port=listener_port)
        if changed:
            print("Successfully registered instance {}".format(hostname))
        else:
@@ -58,6 +61,6 @@ class Command(BaseCommand):
    @transaction.atomic
    def handle(self, **options):
        self.changed = False
-        self._register_hostname(options.get('hostname'), options.get('node_type'), options.get('uuid'))
+        self._register_hostname(options.get('hostname'), options.get('node_type'), options.get('uuid'), options.get('listener_port'))
        if self.changed:
            print("(changed: True)")
--- a/awx/main/management/commands/run_dispatcher.py
+++ b/awx/main/management/commands/run_dispatcher.py
@@ -3,15 +3,13 @@
 import logging
 import yaml

-from django.core.cache import cache as django_cache
+from django.conf import settings
 from django.core.management.base import BaseCommand
-from django.db import connection as django_connection

 from awx.main.dispatch import get_task_queuename
 from awx.main.dispatch.control import Control
 from awx.main.dispatch.pool import AutoscalePool
 from awx.main.dispatch.worker import AWXConsumerPG, TaskWorker
-from awx.main.dispatch import periodic

 logger = logging.getLogger('awx.main.dispatch')

@@ -21,6 +19,7 @@ class Command(BaseCommand):

    def add_arguments(self, parser):
        parser.add_argument('--status', dest='status', action='store_true', help='print the internal state of any running dispatchers')
+        parser.add_argument('--schedule', dest='schedule', action='store_true', help='print the current status of schedules being ran by dispatcher')
        parser.add_argument('--running', dest='running', action='store_true', help='print the UUIDs of any tasked managed by this dispatcher')
        parser.add_argument(
            '--reload',
@@ -42,6 +41,9 @@ class Command(BaseCommand):
        if options.get('status'):
            print(Control('dispatcher').status())
            return
+        if options.get('schedule'):
+            print(Control('dispatcher').schedule())
+            return
        if options.get('running'):
            print(Control('dispatcher').running())
            return
@@ -58,21 +60,11 @@ class Command(BaseCommand):
            print(Control('dispatcher').cancel(cancel_data))
            return

-        # It's important to close these because we're _about_ to fork, and we
-        # don't want the forked processes to inherit the open sockets
-        # for the DB and cache connections (that way lies race conditions)
-        django_connection.close()
-        django_cache.close()
-
-        # spawn a daemon thread to periodically enqueues scheduled tasks
-        # (like the node heartbeat)
-        periodic.run_continuously()
-
        consumer = None

        try:
            queues = ['tower_broadcast_all', 'tower_settings_change', get_task_queuename()]
-            consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues, AutoscalePool(min_workers=4))
+            consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues, AutoscalePool(min_workers=4), schedule=settings.CELERYBEAT_SCHEDULE)
            consumer.run()
        except KeyboardInterrupt:
            logger.debug('Terminating Task Dispatcher')
--- a/awx/main/managers.py
+++ b/awx/main/managers.py
@@ -115,21 +115,25 @@ class InstanceManager(models.Manager):
            return node[0]
        raise RuntimeError("No instance found with the current cluster host id")

-    def register(self, node_uuid=None, hostname=None, ip_address=None, node_type='hybrid', defaults=None):
+    def register(self, node_uuid=None, hostname=None, ip_address="", listener_port=None, node_type='hybrid', defaults=None):
        if not hostname:
            hostname = settings.CLUSTER_HOST_ID

+        if not ip_address:
+            ip_address = ""
+
        with advisory_lock('instance_registration_%s' % hostname):
            if settings.AWX_AUTO_DEPROVISION_INSTANCES:
                # detect any instances with the same IP address.
-                # if one exists, set it to None
-                inst_conflicting_ip = self.filter(ip_address=ip_address).exclude(hostname=hostname)
-                if inst_conflicting_ip.exists():
-                    for other_inst in inst_conflicting_ip:
-                        other_hostname = other_inst.hostname
-                        other_inst.ip_address = None
-                        other_inst.save(update_fields=['ip_address'])
-                        logger.warning("IP address {0} conflict detected, ip address unset for host {1}.".format(ip_address, other_hostname))
+                # if one exists, set it to ""
+                if ip_address:
+                    inst_conflicting_ip = self.filter(ip_address=ip_address).exclude(hostname=hostname)
+                    if inst_conflicting_ip.exists():
+                        for other_inst in inst_conflicting_ip:
+                            other_hostname = other_inst.hostname
+                            other_inst.ip_address = ""
+                            other_inst.save(update_fields=['ip_address'])
+                            logger.warning("IP address {0} conflict detected, ip address unset for host {1}.".format(ip_address, other_hostname))

            # Return existing instance that matches hostname or UUID (default to UUID)
            if node_uuid is not None and node_uuid != UUID_DEFAULT and self.filter(uuid=node_uuid).exists():
@@ -157,6 +161,9 @@ class InstanceManager(models.Manager):
                if instance.node_type != node_type:
                    instance.node_type = node_type
                    update_fields.append('node_type')
+                if instance.listener_port != listener_port:
+                    instance.listener_port = listener_port
+                    update_fields.append('listener_port')
                if update_fields:
                    instance.save(update_fields=update_fields)
                    return (True, instance)
@@ -167,12 +174,11 @@ class InstanceManager(models.Manager):
            create_defaults = {
                'node_state': Instance.States.INSTALLED,
                'capacity': 0,
-                'listener_port': 27199,
            }
            if defaults is not None:
                create_defaults.update(defaults)
            uuid_option = {'uuid': node_uuid if node_uuid is not None else uuid.uuid4()}
            if node_type == 'execution' and 'version' not in create_defaults:
                create_defaults['version'] = RECEPTOR_PENDING
-            instance = self.create(hostname=hostname, ip_address=ip_address, node_type=node_type, **create_defaults, **uuid_option)
+            instance = self.create(hostname=hostname, ip_address=ip_address, listener_port=listener_port, node_type=node_type, **create_defaults, **uuid_option)
        return (True, instance)
--- a/awx/main/migrations/0001_initial.py
+++ b/awx/main/migrations/0001_initial.py
@@ -9,13 +9,11 @@ from django.db import migrations, models
 import django.utils.timezone
 import django.db.models.deletion
 from django.conf import settings
-import taggit.managers
 import awx.main.fields


 class Migration(migrations.Migration):
    dependencies = [
-        ('taggit', '0002_auto_20150616_2121'),
        ('contenttypes', '0002_remove_content_type_name'),
        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
    ]
@@ -184,12 +182,6 @@ class Migration(migrations.Migration):
                        null=True,
                    ),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
            ],
            options={
                'ordering': ('kind', 'name'),
@@ -529,12 +521,6 @@ class Migration(migrations.Migration):
                        null=True,
                    ),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
                ('users', models.ManyToManyField(related_name='organizations', to=settings.AUTH_USER_MODEL, blank=True)),
            ],
            options={
@@ -589,12 +575,6 @@ class Migration(migrations.Migration):
                        null=True,
                    ),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
            ],
        ),
        migrations.CreateModel(
@@ -644,12 +624,6 @@ class Migration(migrations.Migration):
                        null=True,
                    ),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
            ],
            options={
                'ordering': ['-next_run'],
@@ -687,12 +661,6 @@ class Migration(migrations.Migration):
                    ),
                ),
                ('organization', models.ForeignKey(related_name='teams', on_delete=django.db.models.deletion.SET_NULL, to='main.Organization', null=True)),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
                ('users', models.ManyToManyField(related_name='teams', to=settings.AUTH_USER_MODEL, blank=True)),
            ],
            options={
@@ -1267,13 +1235,6 @@ class Migration(migrations.Migration):
                null=True,
            ),
        ),
-        migrations.AddField(
-            model_name='unifiedjobtemplate',
-            name='tags',
-            field=taggit.managers.TaggableManager(
-                to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-            ),
-        ),
        migrations.AddField(
            model_name='unifiedjob',
            name='created_by',
@@ -1319,13 +1280,6 @@ class Migration(migrations.Migration):
            name='schedule',
            field=models.ForeignKey(on_delete=django.db.models.deletion.SET_NULL, default=None, editable=False, to='main.Schedule', null=True),
        ),
-        migrations.AddField(
-            model_name='unifiedjob',
-            name='tags',
-            field=taggit.managers.TaggableManager(
-                to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-            ),
-        ),
        migrations.AddField(
            model_name='unifiedjob',
            name='unified_job_template',
@@ -1370,13 +1324,6 @@ class Migration(migrations.Migration):
                help_text='Organization containing this inventory.',
            ),
        ),
-        migrations.AddField(
-            model_name='inventory',
-            name='tags',
-            field=taggit.managers.TaggableManager(
-                to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-            ),
-        ),
        migrations.AddField(
            model_name='host',
            name='inventory',
@@ -1407,13 +1354,6 @@ class Migration(migrations.Migration):
                null=True,
            ),
        ),
-        migrations.AddField(
-            model_name='host',
-            name='tags',
-            field=taggit.managers.TaggableManager(
-                to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-            ),
-        ),
        migrations.AddField(
            model_name='group',
            name='hosts',
@@ -1441,13 +1381,6 @@ class Migration(migrations.Migration):
            name='parents',
            field=models.ManyToManyField(related_name='children', to='main.Group', blank=True),
        ),
-        migrations.AddField(
-            model_name='group',
-            name='tags',
-            field=taggit.managers.TaggableManager(
-                to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-            ),
-        ),
        migrations.AddField(
            model_name='custominventoryscript',
            name='organization',
@@ -1459,13 +1392,6 @@ class Migration(migrations.Migration):
                null=True,
            ),
        ),
-        migrations.AddField(
-            model_name='custominventoryscript',
-            name='tags',
-            field=taggit.managers.TaggableManager(
-                to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-            ),
-        ),
        migrations.AddField(
            model_name='credential',
            name='team',
--- a/awx/main/migrations/0002_squashed_v300_release.py
+++ b/awx/main/migrations/0002_squashed_v300_release.py
@@ -12,8 +12,6 @@ import django.db.models.deletion
 from django.conf import settings
 from django.utils.timezone import now

-import taggit.managers
-

 def create_system_job_templates(apps, schema_editor):
    """
@@ -125,7 +123,6 @@ class Migration(migrations.Migration):
    ]

    dependencies = [
-        ('taggit', '0002_auto_20150616_2121'),
        ('contenttypes', '0002_remove_content_type_name'),
        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
        ('main', '0001_initial'),
@@ -256,12 +253,6 @@ class Migration(migrations.Migration):
                    'organization',
                    models.ForeignKey(related_name='notification_templates', on_delete=django.db.models.deletion.SET_NULL, to='main.Organization', null=True),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
            ],
        ),
        migrations.AddField(
@@ -721,12 +712,6 @@ class Migration(migrations.Migration):
                        help_text='Organization this label belongs to.',
                    ),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
            ],
            options={
                'ordering': ('organization', 'name'),
--- a/awx/main/migrations/0006_v320_release.py
+++ b/awx/main/migrations/0006_v320_release.py
@@ -5,7 +5,6 @@ from __future__ import unicode_literals
 # Django
 from django.db import connection, migrations, models, OperationalError, ProgrammingError
 from django.conf import settings
-import taggit.managers

 # AWX
 import awx.main.fields
@@ -317,10 +316,6 @@ class Migration(migrations.Migration):
            model_name='permission',
            name='project',
        ),
-        migrations.RemoveField(
-            model_name='permission',
-            name='tags',
-        ),
        migrations.RemoveField(
            model_name='permission',
            name='team',
@@ -510,12 +505,6 @@ class Migration(migrations.Migration):
                        null=True,
                    ),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        to='taggit.Tag', through='taggit.TaggedItem', blank=True, help_text='A comma-separated list of tags.', verbose_name='Tags'
-                    ),
-                ),
            ],
            options={
                'ordering': ('kind', 'name'),
--- a/awx/main/migrations/0067_v350_credential_plugins.py
+++ b/awx/main/migrations/0067_v350_credential_plugins.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 from django.conf import settings
 from django.db import migrations, models
 import django.db.models.deletion
-import taggit.managers

 # AWX
 import awx.main.fields
@@ -20,7 +19,6 @@ def setup_tower_managed_defaults(apps, schema_editor):
 class Migration(migrations.Migration):
    dependencies = [
        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-        ('taggit', '0002_auto_20150616_2121'),
        ('main', '0066_v350_inventorysource_custom_virtualenv'),
    ]

@@ -60,12 +58,6 @@ class Migration(migrations.Migration):
                    'source_credential',
                    models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='target_input_sources', to='main.Credential'),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        blank=True, help_text='A comma-separated list of tags.', through='taggit.TaggedItem', to='taggit.Tag', verbose_name='Tags'
-                    ),
-                ),
                (
                    'target_credential',
                    models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='input_sources', to='main.Credential'),
--- a/awx/main/migrations/0124_execution_environments.py
+++ b/awx/main/migrations/0124_execution_environments.py
@@ -4,12 +4,10 @@ from django.conf import settings
 from django.db import migrations, models
 import django.db.models.deletion
 import django.db.models.expressions
-import taggit.managers


 class Migration(migrations.Migration):
    dependencies = [
-        ('taggit', '0003_taggeditem_add_unique_index'),
        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
        ('main', '0123_drop_hg_support'),
    ]
@@ -69,12 +67,6 @@ class Migration(migrations.Migration):
                        to='main.Organization',
                    ),
                ),
-                (
-                    'tags',
-                    taggit.managers.TaggableManager(
-                        blank=True, help_text='A comma-separated list of tags.', through='taggit.TaggedItem', to='taggit.Tag', verbose_name='Tags'
-                    ),
-                ),
            ],
            options={
                'ordering': (django.db.models.expressions.OrderBy(django.db.models.expressions.F('organization_id'), nulls_first=True), 'image'),
--- a/awx/main/migrations/0185_move_JSONBlob_to_JSONField.py
+++ b/awx/main/migrations/0185_move_JSONBlob_to_JSONField.py
@@ -1,4 +1,4 @@
-# Generated by Django 4.2 on 2023-06-09 19:51
+# Generated by Django 4.2.3 on 2023-08-02 13:18

 import awx.main.models.notifications
 from django.db import migrations, models
@@ -11,16 +11,6 @@ class Migration(migrations.Migration):
    ]

    operations = [
-        migrations.AlterField(
-            model_name='activitystream',
-            name='deleted_actor',
-            field=models.JSONField(null=True),
-        ),
-        migrations.AlterField(
-            model_name='activitystream',
-            name='setting',
-            field=models.JSONField(blank=True, default=dict),
-        ),
        migrations.AlterField(
            model_name='instancegroup',
            name='policy_instance_list',
@@ -28,31 +18,11 @@ class Migration(migrations.Migration):
                blank=True, default=list, help_text='List of exact-match Instances that will always be automatically assigned to this group'
            ),
        ),
-        migrations.AlterField(
-            model_name='job',
-            name='survey_passwords',
-            field=models.JSONField(blank=True, default=dict, editable=False),
-        ),
-        migrations.AlterField(
-            model_name='joblaunchconfig',
-            name='char_prompts',
-            field=models.JSONField(blank=True, default=dict),
-        ),
-        migrations.AlterField(
-            model_name='joblaunchconfig',
-            name='survey_passwords',
-            field=models.JSONField(blank=True, default=dict, editable=False),
-        ),
        migrations.AlterField(
            model_name='jobtemplate',
            name='survey_spec',
            field=models.JSONField(blank=True, default=dict),
        ),
-        migrations.AlterField(
-            model_name='notification',
-            name='body',
-            field=models.JSONField(blank=True, default=dict),
-        ),
        migrations.AlterField(
            model_name='notificationtemplate',
            name='messages',
@@ -94,31 +64,6 @@ class Migration(migrations.Migration):
            name='survey_passwords',
            field=models.JSONField(blank=True, default=dict, editable=False),
        ),
-        migrations.AlterField(
-            model_name='unifiedjob',
-            name='job_env',
-            field=models.JSONField(blank=True, default=dict, editable=False),
-        ),
-        migrations.AlterField(
-            model_name='workflowjob',
-            name='char_prompts',
-            field=models.JSONField(blank=True, default=dict),
-        ),
-        migrations.AlterField(
-            model_name='workflowjob',
-            name='survey_passwords',
-            field=models.JSONField(blank=True, default=dict, editable=False),
-        ),
-        migrations.AlterField(
-            model_name='workflowjobnode',
-            name='char_prompts',
-            field=models.JSONField(blank=True, default=dict),
-        ),
-        migrations.AlterField(
-            model_name='workflowjobnode',
-            name='survey_passwords',
-            field=models.JSONField(blank=True, default=dict, editable=False),
-        ),
        migrations.AlterField(
            model_name='workflowjobtemplate',
            name='char_prompts',
@@ -139,4 +84,194 @@ class Migration(migrations.Migration):
            name='survey_passwords',
            field=models.JSONField(blank=True, default=dict, editable=False),
        ),
+        # These are potentially a problem.  Move the existing fields
+        # aside while pretending like they've been deleted, then add
+        # in fresh empty fields.  Make the old fields nullable where
+        # needed while we are at it, so that new rows don't hit
+        # IntegrityError.  We'll do the data migration out-of-band
+        # using a task.
+        migrations.RunSQL(  # Already nullable
+            "ALTER TABLE main_activitystream RENAME deleted_actor TO deleted_actor_old;",
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='activitystream',
+                    name='deleted_actor',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='activitystream',
+            name='deleted_actor',
+            field=models.JSONField(null=True),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_activitystream RENAME setting TO setting_old;
+            ALTER TABLE main_activitystream ALTER COLUMN setting_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='activitystream',
+                    name='setting',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='activitystream',
+            name='setting',
+            field=models.JSONField(blank=True, default=dict),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_job RENAME survey_passwords TO survey_passwords_old;
+            ALTER TABLE main_job ALTER COLUMN survey_passwords_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='job',
+                    name='survey_passwords',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='survey_passwords',
+            field=models.JSONField(blank=True, default=dict, editable=False),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_joblaunchconfig RENAME char_prompts TO char_prompts_old;
+            ALTER TABLE main_joblaunchconfig ALTER COLUMN char_prompts_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='joblaunchconfig',
+                    name='char_prompts',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='joblaunchconfig',
+            name='char_prompts',
+            field=models.JSONField(blank=True, default=dict),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_joblaunchconfig RENAME survey_passwords TO survey_passwords_old;
+            ALTER TABLE main_joblaunchconfig ALTER COLUMN survey_passwords_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='joblaunchconfig',
+                    name='survey_passwords',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='joblaunchconfig',
+            name='survey_passwords',
+            field=models.JSONField(blank=True, default=dict, editable=False),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_notification RENAME body TO body_old;
+            ALTER TABLE main_notification ALTER COLUMN body_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='notification',
+                    name='body',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='notification',
+            name='body',
+            field=models.JSONField(blank=True, default=dict),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_unifiedjob RENAME job_env TO job_env_old;
+            ALTER TABLE main_unifiedjob ALTER COLUMN job_env_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='unifiedjob',
+                    name='job_env',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='unifiedjob',
+            name='job_env',
+            field=models.JSONField(blank=True, default=dict, editable=False),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_workflowjob RENAME char_prompts TO char_prompts_old;
+            ALTER TABLE main_workflowjob ALTER COLUMN char_prompts_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='workflowjob',
+                    name='char_prompts',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='workflowjob',
+            name='char_prompts',
+            field=models.JSONField(blank=True, default=dict),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_workflowjob RENAME survey_passwords TO survey_passwords_old;
+            ALTER TABLE main_workflowjob ALTER COLUMN survey_passwords_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='workflowjob',
+                    name='survey_passwords',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='workflowjob',
+            name='survey_passwords',
+            field=models.JSONField(blank=True, default=dict, editable=False),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_workflowjobnode RENAME char_prompts TO char_prompts_old;
+            ALTER TABLE main_workflowjobnode ALTER COLUMN char_prompts_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='workflowjobnode',
+                    name='char_prompts',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='workflowjobnode',
+            name='char_prompts',
+            field=models.JSONField(blank=True, default=dict),
+        ),
+        migrations.RunSQL(
+            """
+            ALTER TABLE main_workflowjobnode RENAME survey_passwords TO survey_passwords_old;
+            ALTER TABLE main_workflowjobnode ALTER COLUMN survey_passwords_old DROP NOT NULL;
+            """,
+            state_operations=[
+                migrations.RemoveField(
+                    model_name='workflowjobnode',
+                    name='survey_passwords',
+                ),
+            ],
+        ),
+        migrations.AddField(
+            model_name='workflowjobnode',
+            name='survey_passwords',
+            field=models.JSONField(blank=True, default=dict, editable=False),
+        ),
    ]
--- a/awx/main/migrations/0186_drop_django_taggit.py
+++ b/awx/main/migrations/0186_drop_django_taggit.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations
+
+
+def delete_taggit_contenttypes(apps, schema_editor):
+    ContentType = apps.get_model('contenttypes', 'ContentType')
+    ContentType.objects.filter(app_label='taggit').delete()
+
+
+def delete_taggit_migration_records(apps, schema_editor):
+    recorder = migrations.recorder.MigrationRecorder(connection=schema_editor.connection)
+    recorder.migration_qs.filter(app='taggit').delete()
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('main', '0185_move_JSONBlob_to_JSONField'),
+    ]
+
+    operations = [
+        migrations.RunSQL("DROP TABLE IF EXISTS taggit_tag CASCADE;"),
+        migrations.RunSQL("DROP TABLE IF EXISTS taggit_taggeditem CASCADE;"),
+        migrations.RunPython(delete_taggit_contenttypes),
+        migrations.RunPython(delete_taggit_migration_records),
+    ]
--- a/awx/main/migrations/0187_hop_nodes.py
+++ b/awx/main/migrations/0187_hop_nodes.py
@@ -0,0 +1,75 @@
+# Generated by Django 4.2.3 on 2023-08-04 20:50
+
+import django.core.validators
+from django.db import migrations, models
+from django.conf import settings
+
+
+def automatically_peer_from_control_plane(apps, schema_editor):
+    if settings.IS_K8S:
+        Instance = apps.get_model('main', 'Instance')
+        Instance.objects.filter(node_type='execution').update(peers_from_control_nodes=True)
+        Instance.objects.filter(node_type='control').update(listener_port=None)
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('main', '0186_drop_django_taggit'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='instancelink',
+            options={'ordering': ('id',)},
+        ),
+        migrations.AddField(
+            model_name='instance',
+            name='peers_from_control_nodes',
+            field=models.BooleanField(default=False, help_text='If True, control plane cluster nodes should automatically peer to it.'),
+        ),
+        migrations.AlterField(
+            model_name='instance',
+            name='ip_address',
+            field=models.CharField(blank=True, default='', max_length=50),
+        ),
+        migrations.AlterField(
+            model_name='instance',
+            name='listener_port',
+            field=models.PositiveIntegerField(
+                blank=True,
+                default=None,
+                help_text='Port that Receptor will listen for incoming connections on.',
+                null=True,
+                validators=[django.core.validators.MinValueValidator(1024), django.core.validators.MaxValueValidator(65535)],
+            ),
+        ),
+        migrations.AlterField(
+            model_name='instance',
+            name='peers',
+            field=models.ManyToManyField(related_name='peers_from', through='main.InstanceLink', to='main.instance'),
+        ),
+        migrations.AlterField(
+            model_name='instancelink',
+            name='link_state',
+            field=models.CharField(
+                choices=[('adding', 'Adding'), ('established', 'Established'), ('removing', 'Removing')],
+                default='adding',
+                help_text='Indicates the current life cycle stage of this peer link.',
+                max_length=16,
+            ),
+        ),
+        migrations.AddConstraint(
+            model_name='instance',
+            constraint=models.UniqueConstraint(
+                condition=models.Q(('ip_address', ''), _negated=True),
+                fields=('ip_address',),
+                name='unique_ip_address_not_empty',
+                violation_error_message='Field ip_address must be unique.',
+            ),
+        ),
+        migrations.AddConstraint(
+            model_name='instancelink',
+            constraint=models.CheckConstraint(check=models.Q(('source', models.F('target')), _negated=True), name='source_and_target_can_not_be_equal'),
+        ),
+        migrations.RunPython(automatically_peer_from_control_plane),
+    ]
--- a/awx/main/models/init.py
+++ b/awx/main/models/init.py
@@ -3,6 +3,7 @@

 # Django
 from django.conf import settings  # noqa
+from django.db import connection
 from django.db.models.signals import pre_delete  # noqa

 # AWX
@@ -99,6 +100,58 @@ User.add_to_class('can_access_with_errors', check_user_access_with_errors)
 User.add_to_class('accessible_objects', user_accessible_objects)


+def convert_jsonfields():
+    if connection.vendor != 'postgresql':
+        return
+
+    # fmt: off
+    fields = [
+        ('main_activitystream', 'id', (
+            'deleted_actor',
+            'setting',
+        )),
+        ('main_job', 'unifiedjob_ptr_id', (
+            'survey_passwords',
+        )),
+        ('main_joblaunchconfig', 'id', (
+            'char_prompts',
+            'survey_passwords',
+        )),
+        ('main_notification', 'id', (
+            'body',
+        )),
+        ('main_unifiedjob', 'id', (
+            'job_env',
+        )),
+        ('main_workflowjob', 'unifiedjob_ptr_id', (
+            'char_prompts',
+            'survey_passwords',
+        )),
+        ('main_workflowjobnode', 'id', (
+            'char_prompts',
+            'survey_passwords',
+        )),
+    ]
+    # fmt: on
+
+    with connection.cursor() as cursor:
+        for table, pkfield, columns in fields:
+            # Do the renamed old columns still exist?  If so, run the task.
+            old_columns = ','.join(f"'{column}_old'" for column in columns)
+            cursor.execute(
+                f"""
+                select count(1) from information_schema.columns
+                where
+                  table_name = %s and column_name in ({old_columns});
+                """,
+                (table,),
+            )
+            if cursor.fetchone()[0]:
+                from awx.main.tasks.system import migrate_jsonfield
+
+                migrate_jsonfield.apply_async([table, pkfield, columns])
+
+
 def cleanup_created_modified_by(sender, **kwargs):
    # work around a bug in django-polymorphic that doesn't properly
    # handle cascades for reverse foreign keys on the polymorphic base model
--- a/awx/main/models/base.py
+++ b/awx/main/models/base.py
@@ -7,9 +7,6 @@ from django.core.exceptions import ValidationError, ObjectDoesNotExist
 from django.utils.translation import gettext_lazy as _
 from django.utils.timezone import now

-# Django-Taggit
-from taggit.managers import TaggableManager
-
 # Django-CRUM
 from crum import get_current_user

@@ -301,8 +298,6 @@ class PrimordialModel(HasEditsMixin, CreatedModifiedModel):
        on_delete=models.SET_NULL,
    )

-    tags = TaggableManager(blank=True)
-
    def __init__(self, *args, **kwargs):
        r = super(PrimordialModel, self).__init__(*args, **kwargs)
        if self.pk:
--- a/awx/main/models/credential/init.py
+++ b/awx/main/models/credential/init.py
@@ -17,6 +17,7 @@ from jinja2 import sandbox
 from django.db import models
 from django.utils.translation import gettext_lazy as _, gettext_noop
 from django.core.exceptions import ValidationError
+from django.conf import settings
 from django.utils.encoding import force_str
 from django.utils.functional import cached_property
 from django.utils.timezone import now
@@ -30,7 +31,7 @@ from awx.main.fields import (
    CredentialTypeInjectorField,
    DynamicCredentialInputField,
 )
-from awx.main.utils import decrypt_field, classproperty
+from awx.main.utils import decrypt_field, classproperty, set_environ
 from awx.main.utils.safe_yaml import safe_dump
 from awx.main.utils.execution_environments import to_container_path
 from awx.main.validators import validate_ssh_private_key
@@ -1252,7 +1253,9 @@ class CredentialInputSource(PrimordialModel):
                backend_kwargs[field_name] = value

        backend_kwargs.update(self.metadata)
-        return backend(**backend_kwargs)
+
+        with set_environ(**settings.AWX_TASK_ENV):
+            return backend(**backend_kwargs)

    def get_absolute_url(self, request=None):
        view_name = 'api:credential_input_source_detail'
--- a/awx/main/models/ha.py
+++ b/awx/main/models/ha.py
@@ -12,13 +12,14 @@ from django.dispatch import receiver
 from django.utils.translation import gettext_lazy as _
 from django.conf import settings
 from django.utils.timezone import now, timedelta
-from django.db.models import Sum
+from django.db.models import Sum, Q

 import redis
 from solo.models import SingletonModel

 # AWX
 from awx import __version__ as awx_application_version
+from awx.main.utils import is_testing
 from awx.api.versioning import reverse
 from awx.main.fields import ImplicitRoleField
 from awx.main.managers import InstanceManager, UUID_DEFAULT
@@ -70,16 +71,33 @@ class InstanceLink(BaseModel):
        REMOVING = 'removing', _('Removing')

    link_state = models.CharField(
-        choices=States.choices, default=States.ESTABLISHED, max_length=16, help_text=_("Indicates the current life cycle stage of this peer link.")
+        choices=States.choices, default=States.ADDING, max_length=16, help_text=_("Indicates the current life cycle stage of this peer link.")
    )

    class Meta:
        unique_together = ('source', 'target')
+        ordering = ("id",)
+        constraints = [models.CheckConstraint(check=~models.Q(source=models.F('target')), name='source_and_target_can_not_be_equal')]


 class Instance(HasPolicyEditsMixin, BaseModel):
    """A model representing an AWX instance running against this database."""

+    class Meta:
+        app_label = 'main'
+        ordering = ("hostname",)
+        constraints = [
+            models.UniqueConstraint(
+                fields=["ip_address"],
+                condition=~Q(ip_address=""),  # don't apply to constraint to empty entries
+                name="unique_ip_address_not_empty",
+                violation_error_message=_("Field ip_address must be unique."),
+            )
+        ]
+
+    def __str__(self):
+        return self.hostname
+
    objects = InstanceManager()

    # Fields set in instance registration
@@ -87,10 +105,8 @@ class Instance(HasPolicyEditsMixin, BaseModel):
    hostname = models.CharField(max_length=250, unique=True)
    ip_address = models.CharField(
        blank=True,
-        null=True,
-        default=None,
+        default="",
        max_length=50,
-        unique=True,
    )
    # Auto-fields, implementation is different from BaseModel
    created = models.DateTimeField(auto_now_add=True)
@@ -169,16 +185,14 @@ class Instance(HasPolicyEditsMixin, BaseModel):
    )
    listener_port = models.PositiveIntegerField(
        blank=True,
-        default=27199,
-        validators=[MinValueValidator(1), MaxValueValidator(65535)],
+        null=True,
+        default=None,
+        validators=[MinValueValidator(1024), MaxValueValidator(65535)],
        help_text=_("Port that Receptor will listen for incoming connections on."),
    )

-    peers = models.ManyToManyField('self', symmetrical=False, through=InstanceLink, through_fields=('source', 'target'))
-
-    class Meta:
-        app_label = 'main'
-        ordering = ("hostname",)
+    peers = models.ManyToManyField('self', symmetrical=False, through=InstanceLink, through_fields=('source', 'target'), related_name='peers_from')
+    peers_from_control_nodes = models.BooleanField(default=False, help_text=_("If True, control plane cluster nodes should automatically peer to it."))

    POLICY_FIELDS = frozenset(('managed_by_policy', 'hostname', 'capacity_adjustment'))

@@ -275,10 +289,14 @@ class Instance(HasPolicyEditsMixin, BaseModel):
        if update_last_seen:
            update_fields += ['last_seen']
        if perform_save:
-            self.save(update_fields=update_fields)
+            from awx.main.signals import disable_activity_stream
+
+            with disable_activity_stream():
+                self.save(update_fields=update_fields)
        return update_fields

    def set_capacity_value(self):
+        old_val = self.capacity
        """Sets capacity according to capacity adjustment rule (no save)"""
        if self.enabled and self.node_type != 'hop':
            lower_cap = min(self.mem_capacity, self.cpu_capacity)
@@ -286,6 +304,7 @@ class Instance(HasPolicyEditsMixin, BaseModel):
            self.capacity = lower_cap + (higher_cap - lower_cap) * self.capacity_adjustment
        else:
            self.capacity = 0
+        return int(self.capacity) != int(old_val)  # return True if value changed

    def refresh_capacity_fields(self):
        """Update derived capacity fields from cpu and memory (no save)"""
@@ -293,8 +312,8 @@ class Instance(HasPolicyEditsMixin, BaseModel):
            self.cpu_capacity = 0
            self.mem_capacity = 0  # formula has a non-zero offset, so we make sure it is 0 for hop nodes
        else:
-            self.cpu_capacity = get_cpu_effective_capacity(self.cpu)
-            self.mem_capacity = get_mem_effective_capacity(self.memory)
+            self.cpu_capacity = get_cpu_effective_capacity(self.cpu, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID)))
+            self.mem_capacity = get_mem_effective_capacity(self.memory, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID)))
        self.set_capacity_value()

    def save_health_data(self, version=None, cpu=0, memory=0, uuid=None, update_last_seen=False, errors=''):
@@ -317,12 +336,17 @@ class Instance(HasPolicyEditsMixin, BaseModel):
            self.version = version
            update_fields.append('version')

-        new_cpu = get_corrected_cpu(cpu)
+        if self.node_type == Instance.Types.EXECUTION:
+            new_cpu = cpu
+            new_memory = memory
+        else:
+            new_cpu = get_corrected_cpu(cpu)
+            new_memory = get_corrected_memory(memory)
+
        if new_cpu != self.cpu:
            self.cpu = new_cpu
            update_fields.append('cpu')

-        new_memory = get_corrected_memory(memory)
        if new_memory != self.memory:
            self.memory = new_memory
            update_fields.append('memory')
@@ -464,21 +488,50 @@ def on_instance_group_saved(sender, instance, created=False, raw=False, **kwargs
        instance.set_default_policy_fields()


+def schedule_write_receptor_config(broadcast=True):
+    from awx.main.tasks.receptor import write_receptor_config  # prevents circular import
+
+    # broadcast to all control instances to update their receptor configs
+    if broadcast:
+        connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))
+    else:
+        if not is_testing():
+            write_receptor_config()  # just run locally
+
+
@receiver(post_save, sender=Instance)
 def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
-    if settings.IS_K8S and instance.node_type in (Instance.Types.EXECUTION,):
+    '''
+    Here we link control nodes to hop or execution nodes based on the
+    peers_from_control_nodes field.
+    write_receptor_config should be called on each control node when:
+    1. new node is created with peers_from_control_nodes enabled
+    2. a node changes its value of peers_from_control_nodes
+    3. a new control node comes online and has instances to peer to
+    '''
+    if created and settings.IS_K8S and instance.node_type in [Instance.Types.CONTROL, Instance.Types.HYBRID]:
+        inst = Instance.objects.filter(peers_from_control_nodes=True)
+        if set(instance.peers.all()) != set(inst):
+            instance.peers.set(inst)
+            schedule_write_receptor_config(broadcast=False)
+
+    if settings.IS_K8S and instance.node_type in [Instance.Types.HOP, Instance.Types.EXECUTION]:
        if instance.node_state == Instance.States.DEPROVISIONING:
            from awx.main.tasks.receptor import remove_deprovisioned_node  # prevents circular import

            # wait for jobs on the node to complete, then delete the
            # node and kick off write_receptor_config
            connection.on_commit(lambda: remove_deprovisioned_node.apply_async([instance.hostname]))
-
-        if instance.node_state == Instance.States.INSTALLED:
-            from awx.main.tasks.receptor import write_receptor_config  # prevents circular import
-
-            # broadcast to all control instances to update their receptor configs
-            connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))
+        else:
+            control_instances = set(Instance.objects.filter(node_type__in=[Instance.Types.CONTROL, Instance.Types.HYBRID]))
+            if instance.peers_from_control_nodes:
+                if (control_instances & set(instance.peers_from.all())) != set(control_instances):
+                    instance.peers_from.add(*control_instances)
+                    schedule_write_receptor_config()  # keep method separate to make pytest mocking easier
+            else:
+                if set(control_instances) & set(instance.peers_from.all()):
+                    instance.peers_from.remove(*control_instances)
+                    schedule_write_receptor_config()

    if created or instance.has_policy_changes():
        schedule_policy_task()
@@ -493,6 +546,8 @@ def on_instance_group_deleted(sender, instance, using, **kwargs):
@receiver(post_delete, sender=Instance)
 def on_instance_deleted(sender, instance, using, **kwargs):
    schedule_policy_task()
+    if settings.IS_K8S and instance.node_type in (Instance.Types.EXECUTION, Instance.Types.HOP) and instance.peers_from_control_nodes:
+        schedule_write_receptor_config()


 class UnifiedJobTemplateInstanceGroupMembership(models.Model):
--- a/awx/main/models/inventory.py
+++ b/awx/main/models/inventory.py
@@ -10,7 +10,6 @@ import copy
 import os.path
 from urllib.parse import urljoin

-import dateutil.relativedelta
 import yaml

 # Django
@@ -890,27 +889,10 @@ class HostMetric(models.Model):
            self.deleted = False
            self.save(update_fields=['deleted'])

-    @classmethod
-    def cleanup_task(cls, months_ago):
-        try:
-            months_ago = int(months_ago)
-            if months_ago <= 0:
-                raise ValueError()
-
-            last_automation_before = now() - dateutil.relativedelta.relativedelta(months=months_ago)
-
-            logger.info(f'Cleanup [HostMetric]: soft-deleting records last automated before {last_automation_before}')
-            HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update(
-                deleted=True, deleted_counter=models.F('deleted_counter') + 1, last_deleted=now()
-            )
-            settings.CLEANUP_HOST_METRICS_LAST_TS = now()
-        except (TypeError, ValueError):
-            logger.error(f"Cleanup [HostMetric]: months_ago({months_ago}) has to be a positive integer value")
-

 class HostMetricSummaryMonthly(models.Model):
    """
-    HostMetric summaries computed by scheduled task <TODO> monthly
+    HostMetric summaries computed by scheduled task 'awx.main.tasks.system.host_metric_summary_monthly' monthly
    """

    date = models.DateField(unique=True)
--- a/awx/main/models/unified_jobs.py
+++ b/awx/main/models/unified_jobs.py
@@ -1439,6 +1439,11 @@ class UnifiedJob(
        if not self.celery_task_id:
            return
        canceled = []
+        if not connection.get_autocommit():
+            # this condition is purpose-written for the task manager, when it cancels jobs in workflows
+            ControlDispatcher('dispatcher', self.controller_node).cancel([self.celery_task_id], with_reply=False)
+            return True  # task manager itself needs to act under assumption that cancel was received
+
        try:
            # Use control and reply mechanism to cancel and obtain confirmation
            timeout = 5
--- a/awx/main/models/workflow.py
+++ b/awx/main/models/workflow.py
@@ -661,7 +661,11 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio

    @property
    def event_processing_finished(self):
-        return True
+        return True  # workflow jobs do not have events
+
+    @property
+    def has_unpartitioned_events(self):
+        return False  # workflow jobs do not have events

    def _get_parent_field_name(self):
        if self.job_template_id:
@@ -914,7 +918,11 @@ class WorkflowApproval(UnifiedJob, JobNotificationMixin):

    @property
    def event_processing_finished(self):
-        return True
+        return True  # approval jobs do not have events
+
+    @property
+    def has_unpartitioned_events(self):
+        return False  # approval jobs do not have events

    def send_approval_notification(self, approval_status):
        from awx.main.tasks.system import send_notifications  # avoid circular import
--- a/awx/main/registrar.py
+++ b/awx/main/registrar.py
@@ -3,8 +3,6 @@

 from django.db.models.signals import pre_save, post_save, pre_delete, m2m_changed

-from taggit.managers import TaggableManager
-

 class ActivityStreamRegistrar(object):
    def __init__(self):
@@ -21,8 +19,6 @@ class ActivityStreamRegistrar(object):
            pre_delete.connect(activity_stream_delete, sender=model, dispatch_uid=str(self.__class__) + str(model) + "_delete")

            for m2mfield in model._meta.many_to_many:
-                if isinstance(m2mfield, TaggableManager):
-                    continue  # Special case for taggit app
                try:
                    m2m_attr = getattr(model, m2mfield.name)
                    m2m_changed.connect(
--- a/awx/main/scheduler/task_manager.py
+++ b/awx/main/scheduler/task_manager.py
@@ -25,7 +25,6 @@ from awx.main.models import (
    InventoryUpdate,
    Job,
    Project,
-    ProjectUpdate,
    UnifiedJob,
    WorkflowApproval,
    WorkflowJob,
@@ -102,27 +101,40 @@ class TaskBase:

    def record_aggregate_metrics(self, *args):
        if not is_testing():
-            # increment task_manager_schedule_calls regardless if the other
-            # metrics are recorded
-            s_metrics.Metrics(auto_pipe_execute=True).inc(f"{self.prefix}__schedule_calls", 1)
-            # Only record metrics if the last time recording was more
-            # than SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL ago.
-            # Prevents a short-duration task manager that runs directly after a
-            # long task manager to override useful metrics.
-            current_time = time.time()
-            time_last_recorded = current_time - self.subsystem_metrics.decode(f"{self.prefix}_recorded_timestamp")
-            if time_last_recorded > settings.SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL:
-                logger.debug(f"recording {self.prefix} metrics, last recorded {time_last_recorded} seconds ago")
-                self.subsystem_metrics.set(f"{self.prefix}_recorded_timestamp", current_time)
-                self.subsystem_metrics.pipe_execute()
-            else:
-                logger.debug(f"skipping recording {self.prefix} metrics, last recorded {time_last_recorded} seconds ago")
+            try:
+                # increment task_manager_schedule_calls regardless if the other
+                # metrics are recorded
+                s_metrics.Metrics(auto_pipe_execute=True).inc(f"{self.prefix}__schedule_calls", 1)
+                # Only record metrics if the last time recording was more
+                # than SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL ago.
+                # Prevents a short-duration task manager that runs directly after a
+                # long task manager to override useful metrics.
+                current_time = time.time()
+                time_last_recorded = current_time - self.subsystem_metrics.decode(f"{self.prefix}_recorded_timestamp")
+                if time_last_recorded > settings.SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL:
+                    logger.debug(f"recording {self.prefix} metrics, last recorded {time_last_recorded} seconds ago")
+                    self.subsystem_metrics.set(f"{self.prefix}_recorded_timestamp", current_time)
+                    self.subsystem_metrics.pipe_execute()
+                else:
+                    logger.debug(f"skipping recording {self.prefix} metrics, last recorded {time_last_recorded} seconds ago")
+            except Exception:
+                logger.exception(f"Error saving metrics for {self.prefix}")

    def record_aggregate_metrics_and_exit(self, *args):
        self.record_aggregate_metrics()
        sys.exit(1)

+    def get_local_metrics(self):
+        data = {}
+        for k, metric in self.subsystem_metrics.METRICS.items():
+            if k.startswith(self.prefix) and metric.metric_has_changed:
+                data[k[len(self.prefix) + 1 :]] = metric.current_value
+        return data
+
    def schedule(self):
+        # Always be able to restore the original signal handler if we finish
+        original_sigusr1 = signal.getsignal(signal.SIGUSR1)
+
        # Lock
        with task_manager_bulk_reschedule():
            with advisory_lock(f"{self.prefix}_lock", wait=False) as acquired:
@@ -131,15 +143,24 @@ class TaskBase:
                        logger.debug(f"Not running {self.prefix} scheduler, another task holds lock")
                        return
                    logger.debug(f"Starting {self.prefix} Scheduler")
-                    # if sigterm due to timeout, still record metrics
-                    signal.signal(signal.SIGTERM, self.record_aggregate_metrics_and_exit)
-                    self._schedule()
+                    # if sigusr1 due to timeout, still record metrics
+                    signal.signal(signal.SIGUSR1, self.record_aggregate_metrics_and_exit)
+                    try:
+                        self._schedule()
+                    finally:
+                        # Reset the signal handler back to the default just in case anything
+                        # else uses the same signal for other purposes
+                        signal.signal(signal.SIGUSR1, original_sigusr1)
                    commit_start = time.time()

+                    logger.debug(f"Commiting {self.prefix} Scheduler changes")
+
                if self.prefix == "task_manager":
                    self.subsystem_metrics.set(f"{self.prefix}_commit_seconds", time.time() - commit_start)
+                local_metrics = self.get_local_metrics()
                self.record_aggregate_metrics()
-                logger.debug(f"Finishing {self.prefix} Scheduler")
+
+                logger.debug(f"Finished {self.prefix} Scheduler, timing data:\n{local_metrics}")


 class WorkflowManager(TaskBase):
@@ -154,7 +175,6 @@ class WorkflowManager(TaskBase):
                logger.warning("Workflow manager has reached time out while processing running workflows, exiting loop early")
                ScheduleWorkflowManager().schedule()
                # Do not process any more workflow jobs. Stop here.
-                # Maybe we should schedule another WorkflowManager run
                break
            dag = WorkflowDAG(workflow_job)
            status_changed = False
@@ -169,8 +189,8 @@ class WorkflowManager(TaskBase):
                    workflow_job.save(update_fields=['status', 'start_args'])
                    status_changed = True
            else:
-                workflow_nodes = dag.mark_dnr_nodes()
-                WorkflowJobNode.objects.bulk_update(workflow_nodes, ['do_not_run'])
+                dnr_nodes = dag.mark_dnr_nodes()
+                WorkflowJobNode.objects.bulk_update(dnr_nodes, ['do_not_run'])
                # If workflow is now done, we do special things to mark it as done.
                is_done = dag.is_workflow_done()
                if is_done:
@@ -250,6 +270,10 @@ class WorkflowManager(TaskBase):
                        job.status = 'failed'
                        job.save(update_fields=['status', 'job_explanation'])
                        job.websocket_emit_status('failed')
+                        # NOTE: sending notification templates here is slightly worse performance
+                        # this is not yet optimized in the same way as for the TaskManager
+                        job.send_notification_templates('failed')
+                        ScheduleWorkflowManager().schedule()

                    # TODO: should we emit a status on the socket here similar to tasks.py awx_periodic_scheduler() ?
                    # emit_websocket_notification('/socket.io/jobs', '', dict(id=))
@@ -270,184 +294,115 @@ class WorkflowManager(TaskBase):
 class DependencyManager(TaskBase):
    def __init__(self):
        super().__init__(prefix="dependency_manager")
+        self.all_projects = {}
+        self.all_inventory_sources = {}

-    def create_project_update(self, task, project_id=None):
-        if project_id is None:
-            project_id = task.project_id
-        project_task = Project.objects.get(id=project_id).create_project_update(_eager_fields=dict(launch_type='dependency'))
-
-        # Project created 1 seconds behind
-        project_task.created = task.created - timedelta(seconds=1)
-        project_task.status = 'pending'
-        project_task.save()
-        logger.debug('Spawned {} as dependency of {}'.format(project_task.log_format, task.log_format))
-        return project_task
-
-    def create_inventory_update(self, task, inventory_source_task):
-        inventory_task = InventorySource.objects.get(id=inventory_source_task.id).create_inventory_update(_eager_fields=dict(launch_type='dependency'))
-
-        inventory_task.created = task.created - timedelta(seconds=2)
-        inventory_task.status = 'pending'
-        inventory_task.save()
-        logger.debug('Spawned {} as dependency of {}'.format(inventory_task.log_format, task.log_format))
-
-        return inventory_task
-
-    def add_dependencies(self, task, dependencies):
-        with disable_activity_stream():
-            task.dependent_jobs.add(*dependencies)
-
-    def get_inventory_source_tasks(self):
+    def cache_projects_and_sources(self, task_list):
+        project_ids = set()
        inventory_ids = set()
-        for task in self.all_tasks:
+        for task in task_list:
            if isinstance(task, Job):
-                inventory_ids.add(task.inventory_id)
-        self.all_inventory_sources = [invsrc for invsrc in InventorySource.objects.filter(inventory_id__in=inventory_ids, update_on_launch=True)]
+                if task.project_id:
+                    project_ids.add(task.project_id)
+                if task.inventory_id:
+                    inventory_ids.add(task.inventory_id)
+            elif isinstance(task, InventoryUpdate):
+                if task.inventory_source and task.inventory_source.source_project_id:
+                    project_ids.add(task.inventory_source.source_project_id)

-    def get_latest_inventory_update(self, inventory_source):
-        latest_inventory_update = InventoryUpdate.objects.filter(inventory_source=inventory_source).order_by("-created")
-        if not latest_inventory_update.exists():
-            return None
-        return latest_inventory_update.first()
+        for proj in Project.objects.filter(id__in=project_ids, scm_update_on_launch=True):
+            self.all_projects[proj.id] = proj

-    def should_update_inventory_source(self, job, latest_inventory_update):
-        now = tz_now()
+        for invsrc in InventorySource.objects.filter(inventory_id__in=inventory_ids, update_on_launch=True):
+            self.all_inventory_sources.setdefault(invsrc.inventory_id, [])
+            self.all_inventory_sources[invsrc.inventory_id].append(invsrc)

-        if latest_inventory_update is None:
+    @staticmethod
+    def should_update_again(update, cache_timeout):
+        '''
+        If it has never updated, we need to update
+        If there is already an update in progress then we do not need to a new create one
+        If the last update failed, we always need to try and update again
+        If current time is more than cache_timeout after last update, then we need a new one
+        '''
+        if (update is None) or (update.status in ['failed', 'canceled', 'error']):
            return True
-        '''
-        If there's already a inventory update utilizing this job that's about to run
-        then we don't need to create one
-        '''
-        if latest_inventory_update.status in ['waiting', 'pending', 'running']:
+        if update.status in ['waiting', 'pending', 'running']:
            return False

-        timeout_seconds = timedelta(seconds=latest_inventory_update.inventory_source.update_cache_timeout)
-        if (latest_inventory_update.finished + timeout_seconds) < now:
-            return True
-        if latest_inventory_update.inventory_source.update_on_launch is True and latest_inventory_update.status in ['failed', 'canceled', 'error']:
-            return True
-        return False
+        return bool(((update.finished + timedelta(seconds=cache_timeout))) < tz_now())

-    def get_latest_project_update(self, project_id):
-        latest_project_update = ProjectUpdate.objects.filter(project=project_id, job_type='check').order_by("-created")
-        if not latest_project_update.exists():
-            return None
-        return latest_project_update.first()
-
-    def should_update_related_project(self, job, latest_project_update):
-        now = tz_now()
-
-        if latest_project_update is None:
-            return True
-
-        if latest_project_update.status in ['failed', 'canceled']:
-            return True
-
-        '''
-        If there's already a project update utilizing this job that's about to run
-        then we don't need to create one
-        '''
-        if latest_project_update.status in ['waiting', 'pending', 'running']:
-            return False
-
-        '''
-        If the latest project update has a created time == job_created_time-1
-        then consider the project update found. This is so we don't enter an infinite loop
-        of updating the project when cache timeout is 0.
-        '''
-        if (
-            latest_project_update.project.scm_update_cache_timeout == 0
-            and latest_project_update.launch_type == 'dependency'
-            and latest_project_update.created == job.created - timedelta(seconds=1)
-        ):
-            return False
-        '''
-        Normal Cache Timeout Logic
-        '''
-        timeout_seconds = timedelta(seconds=latest_project_update.project.scm_update_cache_timeout)
-        if (latest_project_update.finished + timeout_seconds) < now:
-            return True
-        return False
+    def get_or_create_project_update(self, project_id):
+        project = self.all_projects.get(project_id, None)
+        if project is not None:
+            latest_project_update = project.project_updates.filter(job_type='check').order_by("-created").first()
+            if self.should_update_again(latest_project_update, project.scm_update_cache_timeout):
+                project_task = project.create_project_update(_eager_fields=dict(launch_type='dependency'))
+                project_task.signal_start()
+                return [project_task]
+            else:
+                return [latest_project_update]
+        return []

    def gen_dep_for_job(self, task):
-        created_dependencies = []
-        dependencies = []
-        # TODO: Can remove task.project None check after scan-job-default-playbook is removed
-        if task.project is not None and task.project.scm_update_on_launch is True:
-            latest_project_update = self.get_latest_project_update(task.project_id)
-            if self.should_update_related_project(task, latest_project_update):
-                latest_project_update = self.create_project_update(task)
-                created_dependencies.append(latest_project_update)
-            dependencies.append(latest_project_update)
+        dependencies = self.get_or_create_project_update(task.project_id)

-        # Inventory created 2 seconds behind job
        try:
            start_args = json.loads(decrypt_field(task, field_name="start_args"))
        except ValueError:
            start_args = dict()
-        # generator for inventory sources related to this task
-        task_inv_sources = (invsrc for invsrc in self.all_inventory_sources if invsrc.inventory_id == task.inventory_id)
-        for inventory_source in task_inv_sources:
+        # generator for update-on-launch inventory sources related to this task
+        for inventory_source in self.all_inventory_sources.get(task.inventory_id, []):
            if "inventory_sources_already_updated" in start_args and inventory_source.id in start_args['inventory_sources_already_updated']:
                continue
-            if not inventory_source.update_on_launch:
-                continue
-            latest_inventory_update = self.get_latest_inventory_update(inventory_source)
-            if self.should_update_inventory_source(task, latest_inventory_update):
-                inventory_task = self.create_inventory_update(task, inventory_source)
-                created_dependencies.append(inventory_task)
+            latest_inventory_update = inventory_source.inventory_updates.order_by("-created").first()
+            if self.should_update_again(latest_inventory_update, inventory_source.update_cache_timeout):
+                inventory_task = inventory_source.create_inventory_update(_eager_fields=dict(launch_type='dependency'))
+                inventory_task.signal_start()
                dependencies.append(inventory_task)
            else:
                dependencies.append(latest_inventory_update)

-        if dependencies:
-            self.add_dependencies(task, dependencies)
-
-        return created_dependencies
+        return dependencies

    def gen_dep_for_inventory_update(self, inventory_task):
-        created_dependencies = []
        if inventory_task.source == "scm":
            invsrc = inventory_task.inventory_source
-            if not invsrc.source_project.scm_update_on_launch:
-                return created_dependencies
-
-            latest_src_project_update = self.get_latest_project_update(invsrc.source_project_id)
-            if self.should_update_related_project(inventory_task, latest_src_project_update):
-                latest_src_project_update = self.create_project_update(inventory_task, project_id=invsrc.source_project_id)
-                created_dependencies.append(latest_src_project_update)
-            self.add_dependencies(inventory_task, [latest_src_project_update])
-            latest_src_project_update.scm_inventory_updates.add(inventory_task)
-        return created_dependencies
+            if invsrc:
+                return self.get_or_create_project_update(invsrc.source_project_id)
+        return []

    @timeit
    def generate_dependencies(self, undeped_tasks):
-        created_dependencies = []
+        dependencies = []
+        self.cache_projects_and_sources(undeped_tasks)
        for task in undeped_tasks:
            task.log_lifecycle("acknowledged")
            if type(task) is Job:
-                created_dependencies += self.gen_dep_for_job(task)
+                job_deps = self.gen_dep_for_job(task)
            elif type(task) is InventoryUpdate:
-                created_dependencies += self.gen_dep_for_inventory_update(task)
+                job_deps = self.gen_dep_for_inventory_update(task)
            else:
                continue
+            if job_deps:
+                dependencies += job_deps
+                with disable_activity_stream():
+                    task.dependent_jobs.add(*dependencies)
+                logger.debug(f'Linked {[dep.log_format for dep in dependencies]} as dependencies of {task.log_format}')
+
        UnifiedJob.objects.filter(pk__in=[task.pk for task in undeped_tasks]).update(dependencies_processed=True)

-        return created_dependencies
-
-    def process_tasks(self):
-        deps = self.generate_dependencies(self.all_tasks)
-        self.generate_dependencies(deps)
-        self.subsystem_metrics.inc(f"{self.prefix}_pending_processed", len(self.all_tasks) + len(deps))
+        return dependencies

    @timeit
    def _schedule(self):
        self.get_tasks(dict(status__in=["pending"], dependencies_processed=False))

        if len(self.all_tasks) > 0:
-            self.get_inventory_source_tasks()
-            self.process_tasks()
+            deps = self.generate_dependencies(self.all_tasks)
+            undeped_deps = [dep for dep in deps if dep.dependencies_processed is False]
+            self.generate_dependencies(undeped_deps)
+            self.subsystem_metrics.inc(f"{self.prefix}_pending_processed", len(self.all_tasks) + len(undeped_deps))
            ScheduleTaskManager().schedule()


@@ -478,6 +433,25 @@ class TaskManager(TaskBase):
        self.tm_models = TaskManagerModels()
        self.controlplane_ig = self.tm_models.instance_groups.controlplane_ig

+    def process_job_dep_failures(self, task):
+        """If job depends on a job that has failed, mark as failed and handle misc stuff."""
+        for dep in task.dependent_jobs.all():
+            # if we detect a failed or error dependency, go ahead and fail this task.
+            if dep.status in ("error", "failed"):
+                task.status = 'failed'
+                logger.warning(f'Previous task failed task: {task.id} dep: {dep.id} task manager')
+                task.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
+                    get_type_for_model(type(dep)),
+                    dep.name,
+                    dep.id,
+                )
+                task.save(update_fields=['status', 'job_explanation'])
+                task.websocket_emit_status('failed')
+                self.pre_start_failed.append(task.id)
+                return True
+
+        return False
+
    def job_blocked_by(self, task):
        # TODO: I'm not happy with this, I think blocking behavior should be decided outside of the dependency graph
        #       in the old task manager this was handled as a method on each task object outside of the graph and
@@ -489,20 +463,6 @@ class TaskManager(TaskBase):
        for dep in task.dependent_jobs.all():
            if dep.status in ACTIVE_STATES:
                return dep
-            # if we detect a failed or error dependency, go ahead and fail this
-            # task. The errback on the dependency takes some time to trigger,
-            # and we don't want the task to enter running state if its
-            # dependency has failed or errored.
-            elif dep.status in ("error", "failed"):
-                task.status = 'failed'
-                task.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
-                    get_type_for_model(type(dep)),
-                    dep.name,
-                    dep.id,
-                )
-                task.save(update_fields=['status', 'job_explanation'])
-                task.websocket_emit_status('failed')
-                return dep

        return None

@@ -522,7 +482,6 @@ class TaskManager(TaskBase):
        if self.start_task_limit == 0:
            # schedule another run immediately after this task manager
            ScheduleTaskManager().schedule()
-        from awx.main.tasks.system import handle_work_error, handle_work_success

        task.status = 'waiting'

@@ -533,7 +492,7 @@ class TaskManager(TaskBase):
                task.job_explanation += ' '
            task.job_explanation += 'Task failed pre-start check.'
            task.save()
-            # TODO: run error handler to fail sub-tasks and send notifications
+            self.pre_start_failed.append(task.id)
        else:
            if type(task) is WorkflowJob:
                task.status = 'running'
@@ -555,19 +514,16 @@ class TaskManager(TaskBase):
        # apply_async does a NOTIFY to the channel dispatcher is listening to
        # postgres will treat this as part of the transaction, which is what we want
        if task.status != 'failed' and type(task) is not WorkflowJob:
-            task_actual = {'type': get_type_for_model(type(task)), 'id': task.id}
            task_cls = task._get_task_class()
            task_cls.apply_async(
                [task.pk],
                opts,
                queue=task.get_queue_name(),
                uuid=task.celery_task_id,
-                callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
-                errbacks=[{'task': handle_work_error.name, 'kwargs': {'task_actual': task_actual}}],
            )

-        # In exception cases, like a job failing pre-start checks, we send the websocket status message
-        # for jobs going into waiting, we omit this because of performance issues, as it should go to running quickly
+        # In exception cases, like a job failing pre-start checks, we send the websocket status message.
+        # For jobs going into waiting, we omit this because of performance issues, as it should go to running quickly
        if task.status != 'waiting':
            task.websocket_emit_status(task.status)  # adds to on_commit

@@ -588,6 +544,11 @@ class TaskManager(TaskBase):
            if self.timed_out():
                logger.warning("Task manager has reached time out while processing pending jobs, exiting loop early")
                break
+
+            has_failed = self.process_job_dep_failures(task)
+            if has_failed:
+                continue
+
            blocked_by = self.job_blocked_by(task)
            if blocked_by:
                self.subsystem_metrics.inc(f"{self.prefix}_tasks_blocked", 1)
@@ -701,6 +662,11 @@ class TaskManager(TaskBase):
                reap_job(j, 'failed')

    def process_tasks(self):
+        # maintain a list of jobs that went to an early failure state,
+        # meaning the dispatcher never got these jobs,
+        # that means we have to handle notifications for those
+        self.pre_start_failed = []
+
        running_tasks = [t for t in self.all_tasks if t.status in ['waiting', 'running']]
        self.process_running_tasks(running_tasks)
        self.subsystem_metrics.inc(f"{self.prefix}_running_processed", len(running_tasks))
@@ -710,6 +676,11 @@ class TaskManager(TaskBase):
        self.process_pending_tasks(pending_tasks)
        self.subsystem_metrics.inc(f"{self.prefix}_pending_processed", len(pending_tasks))

+        if self.pre_start_failed:
+            from awx.main.tasks.system import handle_failure_notifications
+
+            handle_failure_notifications.delay(self.pre_start_failed)
+
    def timeout_approval_node(self, task):
        if self.timed_out():
            logger.warning("Task manager has reached time out while processing approval nodes, exiting loop early")
--- a/awx/main/tasks/init.py
+++ b/awx/main/tasks/init.py
@@ -1 +1 @@
-from . import jobs, receptor, system  # noqa
+from . import host_metrics, jobs, receptor, system  # noqa
--- a/awx/main/tasks/callback.py
+++ b/awx/main/tasks/callback.py
@@ -29,8 +29,9 @@ class RunnerCallback:
        self.safe_env = {}
        self.event_ct = 0
        self.model = model
-        self.update_attempts = int(settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE / 5)
+        self.update_attempts = int(settings.DISPATCHER_DB_DOWNTIME_TOLERANCE / 5)
        self.wrapup_event_dispatched = False
+        self.artifacts_processed = False
        self.extra_update_fields = {}

    def update_model(self, pk, _attempt=0, **updates):
@@ -207,9 +208,13 @@ class RunnerCallback:
            # We opened a connection just for that save, close it here now
            connections.close_all()
        elif status_data['status'] == 'error':
-            result_traceback = status_data.get('result_traceback', None)
-            if result_traceback:
-                self.delay_update(result_traceback=result_traceback)
+            for field_name in ('result_traceback', 'job_explanation'):
+                field_value = status_data.get(field_name, None)
+                if field_value:
+                    self.delay_update(**{field_name: field_value})
+
+    def artifacts_handler(self, artifact_dir):
+        self.artifacts_processed = True


 class RunnerCallbackForProjectUpdate(RunnerCallback):
--- a/awx/main/tasks/facts.py
+++ b/awx/main/tasks/facts.py
@@ -9,6 +9,7 @@ from django.conf import settings
 from django.db.models.query import QuerySet
 from django.utils.encoding import smart_str
 from django.utils.timezone import now
+from django.db import OperationalError

 # AWX
 from awx.main.utils.common import log_excess_runtime
@@ -57,6 +58,28 @@ def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=No
    return None


+def raw_update_hosts(host_list):
+    Host.objects.bulk_update(host_list, ['ansible_facts', 'ansible_facts_modified'])
+
+
+def update_hosts(host_list, max_tries=5):
+    if not host_list:
+        return
+    for i in range(max_tries):
+        try:
+            raw_update_hosts(host_list)
+        except OperationalError as exc:
+            # Deadlocks can happen if this runs at the same time as another large query
+            # inventory updates and updating last_job_host_summary are candidates for conflict
+            # but these would resolve easily on a retry
+            if i + 1 < max_tries:
+                logger.info(f'OperationalError (suspected deadlock) saving host facts retry {i}, message: {exc}')
+                continue
+            else:
+                raise
+        break
+
+
@log_excess_runtime(
    logger,
    debug_cutoff=0.01,
@@ -111,7 +134,6 @@ def finish_fact_cache(hosts, destination, facts_write_time, log_data, job_id=Non
            system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
            log_data['cleared_ct'] += 1
        if len(hosts_to_update) > 100:
-            Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
+            update_hosts(hosts_to_update)
            hosts_to_update = []
-    if hosts_to_update:
-        Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
+    update_hosts(hosts_to_update)
--- a/awx/main/tasks/helpers.py
+++ b/awx/main/tasks/helpers.py
@@ -0,0 +1,10 @@
+from django.utils.timezone import now
+from rest_framework.fields import DateTimeField
+
+
+def is_run_threshold_reached(setting, threshold_seconds):
+    last_time = DateTimeField().to_internal_value(setting) if setting else None
+    if not last_time:
+        return True
+    else:
+        return (now() - last_time).total_seconds() > threshold_seconds
--- a/awx/main/tasks/host_metrics.py
+++ b/awx/main/tasks/host_metrics.py
@@ -0,0 +1,262 @@
+import datetime
+from dateutil.relativedelta import relativedelta
+import logging
+
+from django.conf import settings
+from django.db.models import Count, F
+from django.db.models.functions import TruncMonth
+from django.utils.timezone import now
+from awx.main.dispatch import get_task_queuename
+from awx.main.dispatch.publish import task
+from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
+from awx.main.tasks.helpers import is_run_threshold_reached
+from awx.conf.license import get_license
+
+logger = logging.getLogger('awx.main.tasks.host_metrics')
+
+
+@task(queue=get_task_queuename)
+def cleanup_host_metrics():
+    if is_run_threshold_reached(getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', None), getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400):
+        logger.info(f"Executing cleanup_host_metrics, last ran at {getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', '---')}")
+        HostMetricTask().cleanup(
+            soft_threshold=getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12),
+            hard_threshold=getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36),
+        )
+        logger.info("Finished cleanup_host_metrics")
+
+
+@task(queue=get_task_queuename)
+def host_metric_summary_monthly():
+    """Run cleanup host metrics summary monthly task each week"""
+    if is_run_threshold_reached(getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', None), getattr(settings, 'HOST_METRIC_SUMMARY_TASK_INTERVAL', 7) * 86400):
+        logger.info(f"Executing host_metric_summary_monthly, last ran at {getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', '---')}")
+        HostMetricSummaryMonthlyTask().execute()
+        logger.info("Finished host_metric_summary_monthly")
+
+
+class HostMetricTask:
+    """
+    This class provides cleanup task for HostMetric model.
+    There are two modes:
+    - soft cleanup (updates columns delete, deleted_counter and last_deleted)
+    - hard cleanup (deletes from the db)
+    """
+
+    def cleanup(self, soft_threshold=None, hard_threshold=None):
+        """
+        Main entrypoint, runs either soft cleanup, hard cleanup or both
+
+        :param soft_threshold: (int)
+        :param hard_threshold: (int)
+        """
+        if hard_threshold is not None:
+            self.hard_cleanup(hard_threshold)
+        if soft_threshold is not None:
+            self.soft_cleanup(soft_threshold)
+
+        settings.CLEANUP_HOST_METRICS_LAST_TS = now()
+
+    @staticmethod
+    def soft_cleanup(threshold=None):
+        if threshold is None:
+            threshold = getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12)
+
+        try:
+            threshold = int(threshold)
+        except (ValueError, TypeError) as e:
+            raise type(e)("soft_threshold has to be convertible to number") from e
+
+        last_automation_before = now() - relativedelta(months=threshold)
+        rows = HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update(
+            deleted=True, deleted_counter=F('deleted_counter') + 1, last_deleted=now()
+        )
+        logger.info(f'cleanup_host_metrics: soft-deleted records last automated before {last_automation_before}, affected rows: {rows}')
+
+    @staticmethod
+    def hard_cleanup(threshold=None):
+        if threshold is None:
+            threshold = getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36)
+
+        try:
+            threshold = int(threshold)
+        except (ValueError, TypeError) as e:
+            raise type(e)("hard_threshold has to be convertible to number") from e
+
+        last_deleted_before = now() - relativedelta(months=threshold)
+        queryset = HostMetric.objects.filter(deleted=True, last_deleted__lt=last_deleted_before)
+        rows = queryset.delete()
+        logger.info(f'cleanup_host_metrics: hard-deleted records which were soft deleted before {last_deleted_before}, affected rows: {rows[0]}')
+
+
+class HostMetricSummaryMonthlyTask:
+    """
+    This task computes last [threshold] months of HostMetricSummaryMonthly table
+    [threshold] is setting CLEANUP_HOST_METRICS_HARD_THRESHOLD
+    Each record in the table represents changes in HostMetric table in one month
+    It always overrides all the months newer than <threshold>, never updates older months
+    Algorithm:
+    - hosts_added are HostMetric records with first_automation in given month
+    - hosts_deleted are HostMetric records with deleted=True and last_deleted in given month
+    - - HostMetrics soft-deleted before <threshold> also increases hosts_deleted in their last_deleted month
+    - license_consumed is license_consumed(previous month) + hosts_added - hosts_deleted
+    - - license_consumed for HostMetricSummaryMonthly.date < [threshold] is computed also from
+        all HostMetrics.first_automation < [threshold]
+    - license_capacity is set only for current month, and it's never updated (value taken from current subscription)
+    """
+
+    def __init__(self):
+        self.host_metrics = {}
+        self.processed_month = self._get_first_month()
+        self.existing_summaries = None
+        self.existing_summaries_idx = 0
+        self.existing_summaries_cnt = 0
+        self.records_to_create = []
+        self.records_to_update = []
+
+    def execute(self):
+        self._load_existing_summaries()
+        self._load_hosts_added()
+        self._load_hosts_deleted()
+
+        # Get first month after last hard delete
+        month = self._get_first_month()
+        license_consumed = self._get_license_consumed_before(month)
+
+        # Fill record for each month
+        while month <= datetime.date.today().replace(day=1):
+            summary = self._find_or_create_summary(month)
+            # Update summary and update license_consumed by hosts added/removed this month
+            self._update_summary(summary, month, license_consumed)
+            license_consumed = summary.license_consumed
+
+            month = month + relativedelta(months=1)
+
+        # Create/Update stats
+        HostMetricSummaryMonthly.objects.bulk_create(self.records_to_create, batch_size=1000)
+        HostMetricSummaryMonthly.objects.bulk_update(self.records_to_update, ['license_consumed', 'hosts_added', 'hosts_deleted'], batch_size=1000)
+
+        # Set timestamp of last run
+        settings.HOST_METRIC_SUMMARY_TASK_LAST_TS = now()
+
+    def _get_license_consumed_before(self, month):
+        license_consumed = 0
+        for metric_month, metric in self.host_metrics.items():
+            if metric_month < month:
+                hosts_added = metric.get('hosts_added', 0)
+                hosts_deleted = metric.get('hosts_deleted', 0)
+                license_consumed = license_consumed + hosts_added - hosts_deleted
+            else:
+                break
+        return license_consumed
+
+    def _load_existing_summaries(self):
+        """Find all summaries newer than host metrics delete threshold"""
+        self.existing_summaries = HostMetricSummaryMonthly.objects.filter(date__gte=self._get_first_month()).order_by('date')
+        self.existing_summaries_idx = 0
+        self.existing_summaries_cnt = len(self.existing_summaries)
+
+    def _load_hosts_added(self):
+        """Aggregates hosts added each month, by the 'first_automation' timestamp"""
+        #
+        # -- SQL translation (for better code readability)
+        # SELECT date_trunc('month', first_automation) as month,
+        #        count(first_automation) AS hosts_added
+        # FROM main_hostmetric
+        # GROUP BY month
+        # ORDER by month;
+        result = (
+            HostMetric.objects.annotate(month=TruncMonth('first_automation'))
+            .values('month')
+            .annotate(hosts_added=Count('first_automation'))
+            .values('month', 'hosts_added')
+            .order_by('month')
+        )
+
+        for host_metric in list(result):
+            month = host_metric['month']
+            if month:
+                beginning_of_month = datetime.date(month.year, month.month, 1)
+                if self.host_metrics.get(beginning_of_month) is None:
+                    self.host_metrics[beginning_of_month] = {}
+                self.host_metrics[beginning_of_month]['hosts_added'] = host_metric['hosts_added']
+
+    def _load_hosts_deleted(self):
+        """
+        Aggregates hosts deleted each month, by the 'last_deleted' timestamp.
+        Host metrics have to be deleted NOW to be counted as deleted before
+        (by intention - statistics can change retrospectively by re-automation of previously deleted host)
+        """
+        #
+        # -- SQL translation (for better code readability)
+        # SELECT date_trunc('month', last_deleted) as month,
+        #       count(last_deleted) AS hosts_deleted
+        # FROM main_hostmetric
+        # WHERE deleted = True
+        # GROUP BY 1 # equal to "GROUP BY month"
+        # ORDER by month;
+        result = (
+            HostMetric.objects.annotate(month=TruncMonth('last_deleted'))
+            .values('month')
+            .annotate(hosts_deleted=Count('last_deleted'))
+            .values('month', 'hosts_deleted')
+            .filter(deleted=True)
+            .order_by('month')
+        )
+        for host_metric in list(result):
+            month = host_metric['month']
+            if month:
+                beginning_of_month = datetime.date(month.year, month.month, 1)
+                if self.host_metrics.get(beginning_of_month) is None:
+                    self.host_metrics[beginning_of_month] = {}
+                self.host_metrics[beginning_of_month]['hosts_deleted'] = host_metric['hosts_deleted']
+
+    def _find_or_create_summary(self, month):
+        summary = self._find_summary(month)
+
+        if not summary:
+            summary = HostMetricSummaryMonthly(date=month)
+            self.records_to_create.append(summary)
+        else:
+            self.records_to_update.append(summary)
+        return summary
+
+    def _find_summary(self, month):
+        """
+        Existing summaries are ordered by month ASC.
+        This method is called with month in ascending order too => only 1 traversing is enough
+        """
+        summary = None
+        while not summary and self.existing_summaries_idx < self.existing_summaries_cnt:
+            tmp = self.existing_summaries[self.existing_summaries_idx]
+            if tmp.date < month:
+                self.existing_summaries_idx += 1
+            elif tmp.date == month:
+                summary = tmp
+            elif tmp.date > month:
+                break
+        return summary
+
+    def _update_summary(self, summary, month, license_consumed):
+        """Updates the metric with hosts added and deleted and set license info for current month"""
+        # Get month counts from host metrics, zero if not found
+        hosts_added, hosts_deleted = 0, 0
+        if metric := self.host_metrics.get(month, None):
+            hosts_added = metric.get('hosts_added', 0)
+            hosts_deleted = metric.get('hosts_deleted', 0)
+
+        summary.license_consumed = license_consumed + hosts_added - hosts_deleted
+        summary.hosts_added = hosts_added
+        summary.hosts_deleted = hosts_deleted
+
+        # Set subscription count for current month
+        if month == datetime.date.today().replace(day=1):
+            license_info = get_license()
+            summary.license_capacity = license_info.get('instance_count', 0)
+        return summary
+
+    @staticmethod
+    def _get_first_month():
+        """Returns first month after host metrics hard delete threshold"""
+        threshold = getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36)
+        return datetime.date.today().replace(day=1) - relativedelta(months=int(threshold) - 1)
--- a/awx/main/tasks/jobs.py
+++ b/awx/main/tasks/jobs.py
@@ -74,6 +74,8 @@ from awx.main.utils.common import (
    extract_ansible_vars,
    get_awx_version,
    create_partition,
+    ScheduleWorkflowManager,
+    ScheduleTaskManager,
 )
 from awx.conf.license import get_license
 from awx.main.utils.handlers import SpecialInventoryHandler
@@ -112,7 +114,7 @@ class BaseTask(object):

    def __init__(self):
        self.cleanup_paths = []
-        self.update_attempts = int(settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE / 5)
+        self.update_attempts = int(settings.DISPATCHER_DB_DOWNTIME_TOLERANCE / 5)
        self.runner_callback = self.callback_class(model=self.model)

    def update_model(self, pk, _attempt=0, **updates):
@@ -450,6 +452,12 @@ class BaseTask(object):
                instance.ansible_version = ansible_version_info
                instance.save(update_fields=['ansible_version'])

+        # Run task manager appropriately for speculative dependencies
+        if instance.unifiedjob_blocked_jobs.exists():
+            ScheduleTaskManager().schedule()
+        if instance.spawned_by_workflow:
+            ScheduleWorkflowManager().schedule()
+
    def should_use_fact_cache(self):
        return False

@@ -1094,7 +1102,7 @@ class RunJob(SourceControlMixin, BaseTask):
            # actual `run()` call; this _usually_ means something failed in
            # the pre_run_hook method
            return
-        if self.should_use_fact_cache():
+        if self.should_use_fact_cache() and self.runner_callback.artifacts_processed:
            job.log_lifecycle("finish_job_fact_cache")
            finish_fact_cache(
                job.get_hosts_for_fact_cache(),
@@ -1873,6 +1881,8 @@ class RunSystemJob(BaseTask):
            if system_job.job_type in ('cleanup_jobs', 'cleanup_activitystream'):
                if 'days' in json_vars:
                    args.extend(['--days', str(json_vars.get('days', 60))])
+                if 'batch_size' in json_vars:
+                    args.extend(['--batch-size', str(json_vars['batch_size'])])
                if 'dry_run' in json_vars and json_vars['dry_run']:
                    args.extend(['--dry-run'])
            if system_job.job_type == 'cleanup_jobs':
--- a/awx/main/tasks/receptor.py
+++ b/awx/main/tasks/receptor.py
@@ -30,6 +30,7 @@ from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
 from awx.main.models import Instance, InstanceLink, UnifiedJob
 from awx.main.dispatch import get_task_queuename
 from awx.main.dispatch.publish import task
+from awx.main.utils.pglock import advisory_lock

 # Receptorctl
 from receptorctl.socket_interface import ReceptorControl
@@ -431,16 +432,16 @@ class AWXReceptorJob:
                        # massive, only ask for last 1000 bytes
                        startpos = max(stdout_size - 1000, 0)
                        resultsock, resultfile = receptor_ctl.get_work_results(self.unit_id, startpos=startpos, return_socket=True, return_sockfile=True)
-                        resultsock.setblocking(False)  # this makes resultfile reads non blocking
                        lines = resultfile.readlines()
                        receptor_output = b"".join(lines).decode()
                    if receptor_output:
-                        self.task.runner_callback.delay_update(result_traceback=receptor_output)
+                        self.task.runner_callback.delay_update(result_traceback=f'Worker output:\n{receptor_output}')
                    elif detail:
-                        self.task.runner_callback.delay_update(result_traceback=detail)
+                        self.task.runner_callback.delay_update(result_traceback=f'Receptor detail:\n{detail}')
                    else:
                        logger.warning(f'No result details or output from {self.task.instance.log_format}, status:\n{state_name}')
                except Exception:
+                    logger.exception(f'Work results error from job id={self.task.instance.id} work_unit={self.task.instance.work_unit_id}')
                    raise RuntimeError(detail)

        return res
@@ -464,6 +465,7 @@ class AWXReceptorJob:
            event_handler=self.task.runner_callback.event_handler,
            finished_callback=self.task.runner_callback.finished_callback,
            status_handler=self.task.runner_callback.status_handler,
+            artifacts_handler=self.task.runner_callback.artifacts_handler,
            **self.runner_params,
        )

@@ -639,7 +641,7 @@ class AWXReceptorJob:
 #
 RECEPTOR_CONFIG_STARTER = (
    {'local-only': None},
-    {'log-level': 'info'},
+    {'log-level': settings.RECEPTOR_LOG_LEVEL},
    {'node': {'firewallrules': [{'action': 'reject', 'tonode': settings.CLUSTER_HOST_ID, 'toservice': 'control'}]}},
    {'control-service': {'service': 'control', 'filename': '/var/run/receptor/receptor.sock', 'permissions': '0660'}},
    {'work-command': {'worktype': 'local', 'command': 'ansible-runner', 'params': 'worker', 'allowruntimeparams': True}},
@@ -674,26 +676,41 @@ RECEPTOR_CONFIG_STARTER = (
 )


-@task()
-def write_receptor_config():
-    lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
-    with lock:
-        receptor_config = list(RECEPTOR_CONFIG_STARTER)
+def should_update_config(instances):
+    '''
+    checks that the list of instances matches the list of
+    tcp-peers in the config
+    '''
+    current_config = read_receptor_config()  # this gets receptor conf lock
+    current_peers = []
+    for config_entry in current_config:
+        for key, value in config_entry.items():
+            if key.endswith('-peer'):
+                current_peers.append(value['address'])
+    intended_peers = [f"{i.hostname}:{i.listener_port}" for i in instances]
+    logger.debug(f"Peers current {current_peers} intended {intended_peers}")
+    if set(current_peers) == set(intended_peers):
+        return False  # config file is already update to date

-        this_inst = Instance.objects.me()
-        instances = Instance.objects.filter(node_type=Instance.Types.EXECUTION)
-        existing_peers = {link.target_id for link in InstanceLink.objects.filter(source=this_inst)}
-        new_links = []
-        for instance in instances:
-            peer = {'tcp-peer': {'address': f'{instance.hostname}:{instance.listener_port}', 'tls': 'tlsclient'}}
-            receptor_config.append(peer)
-            if instance.id not in existing_peers:
-                new_links.append(InstanceLink(source=this_inst, target=instance, link_state=InstanceLink.States.ADDING))
+    return True

-        InstanceLink.objects.bulk_create(new_links)

-        with open(__RECEPTOR_CONF, 'w') as file:
-            yaml.dump(receptor_config, file, default_flow_style=False)
+def generate_config_data():
+    # returns two values
+    #   receptor config - based on current database peers
+    #   should_update   - If True, receptor_config differs from the receptor conf file on disk
+    instances = Instance.objects.filter(node_type__in=(Instance.Types.EXECUTION, Instance.Types.HOP), peers_from_control_nodes=True)
+
+    receptor_config = list(RECEPTOR_CONFIG_STARTER)
+    for instance in instances:
+        peer = {'tcp-peer': {'address': f'{instance.hostname}:{instance.listener_port}', 'tls': 'tlsclient'}}
+        receptor_config.append(peer)
+    should_update = should_update_config(instances)
+    return receptor_config, should_update
+
+
+def reload_receptor():
+    logger.warning("Receptor config changed, reloading receptor")

    # This needs to be outside of the lock because this function itself will acquire the lock.
    receptor_ctl = get_receptor_ctl()
@@ -709,8 +726,29 @@ def write_receptor_config():
    else:
        raise RuntimeError("Receptor reload failed")

-    links = InstanceLink.objects.filter(source=this_inst, target__in=instances, link_state=InstanceLink.States.ADDING)
-    links.update(link_state=InstanceLink.States.ESTABLISHED)
+
+@task()
+def write_receptor_config():
+    """
+    This task runs async on each control node, K8S only.
+    It is triggered whenever remote is added or removed, or if peers_from_control_nodes
+    is flipped.
+    It is possible for write_receptor_config to be called multiple times.
+    For example, if new instances are added in quick succession.
+    To prevent that case, each control node first grabs a DB advisory lock, specific
+    to just that control node (i.e. multiple control nodes can run this function
+    at the same time, since it only writes the local receptor config file)
+    """
+    with advisory_lock(f"{settings.CLUSTER_HOST_ID}_write_receptor_config", wait=True):
+        # Config file needs to be updated
+        receptor_config, should_update = generate_config_data()
+        if should_update:
+            lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
+            with lock:
+                with open(__RECEPTOR_CONF, 'w') as file:
+                    yaml.dump(receptor_config, file, default_flow_style=False)
+
+            reload_receptor()


@task(queue=get_task_queuename)
@@ -730,6 +768,3 @@ def remove_deprovisioned_node(hostname):

    # This will as a side effect also delete the InstanceLinks that are tied to it.
    Instance.objects.filter(hostname=hostname).delete()
-
-    # Update the receptor configs for all of the control-plane.
-    write_receptor_config.apply_async(queue='tower_broadcast_all')
--- a/awx/main/tasks/signals.py
+++ b/awx/main/tasks/signals.py
@@ -16,7 +16,9 @@ class SignalExit(Exception):
 class SignalState:
    def reset(self):
        self.sigterm_flag = False
-        self.is_active = False
+        self.sigint_flag = False
+
+        self.is_active = False  # for nested context managers
        self.original_sigterm = None
        self.original_sigint = None
        self.raise_exception = False
@@ -24,23 +26,36 @@ class SignalState:
    def __init__(self):
        self.reset()

-    def set_flag(self, *args):
-        """Method to pass into the python signal.signal method to receive signals"""
-        self.sigterm_flag = True
+    def raise_if_needed(self):
        if self.raise_exception:
            self.raise_exception = False  # so it is not raised a second time in error handling
            raise SignalExit()

+    def set_sigterm_flag(self, *args):
+        self.sigterm_flag = True
+        self.raise_if_needed()
+
+    def set_sigint_flag(self, *args):
+        self.sigint_flag = True
+        self.raise_if_needed()
+
    def connect_signals(self):
        self.original_sigterm = signal.getsignal(signal.SIGTERM)
        self.original_sigint = signal.getsignal(signal.SIGINT)
-        signal.signal(signal.SIGTERM, self.set_flag)
-        signal.signal(signal.SIGINT, self.set_flag)
+        signal.signal(signal.SIGTERM, self.set_sigterm_flag)
+        signal.signal(signal.SIGINT, self.set_sigint_flag)
        self.is_active = True

    def restore_signals(self):
        signal.signal(signal.SIGTERM, self.original_sigterm)
        signal.signal(signal.SIGINT, self.original_sigint)
+        # if we got a signal while context manager was active, call parent methods.
+        if self.sigterm_flag:
+            if callable(self.original_sigterm):
+                self.original_sigterm()
+        if self.sigint_flag:
+            if callable(self.original_sigint):
+                self.original_sigint()
        self.reset()


@@ -48,7 +63,7 @@ signal_state = SignalState()


 def signal_callback():
-    return signal_state.sigterm_flag
+    return bool(signal_state.sigterm_flag or signal_state.sigint_flag)


 def with_signal_handling(f):
--- a/awx/main/tasks/system.py
+++ b/awx/main/tasks/system.py
@@ -2,6 +2,7 @@
 from collections import namedtuple
 import functools
 import importlib
+import itertools
 import json
 import logging
 import os
@@ -14,7 +15,7 @@ from datetime import datetime

 # Django
 from django.conf import settings
-from django.db import transaction, DatabaseError, IntegrityError
+from django.db import connection, transaction, DatabaseError, IntegrityError
 from django.db.models.fields.related import ForeignKey
 from django.utils.timezone import now, timedelta
 from django.utils.encoding import smart_str
@@ -47,21 +48,16 @@ from awx.main.models import (
    Inventory,
    SmartInventoryMembership,
    Job,
-    HostMetric,
+    convert_jsonfields,
 )
 from awx.main.constants import ACTIVE_STATES
 from awx.main.dispatch.publish import task
 from awx.main.dispatch import get_task_queuename, reaper
-from awx.main.utils.common import (
-    get_type_for_model,
-    ignore_inventory_computed_fields,
-    ignore_inventory_group_removal,
-    ScheduleWorkflowManager,
-    ScheduleTaskManager,
-)
+from awx.main.utils.common import ignore_inventory_computed_fields, ignore_inventory_group_removal

 from awx.main.utils.reload import stop_local_services
 from awx.main.utils.pglock import advisory_lock
+from awx.main.tasks.helpers import is_run_threshold_reached
 from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config
 from awx.main.consumers import emit_channel_notification
 from awx.main import analytics
@@ -86,6 +82,11 @@ def dispatch_startup():
    if settings.IS_K8S:
        write_receptor_config()

+    try:
+        convert_jsonfields()
+    except Exception:
+        logger.exception("Failed json field conversion, skipping.")
+
    startup_logger.debug("Syncing Schedules")
    for sch in Schedule.objects.all():
        try:
@@ -129,6 +130,52 @@ def inform_cluster_of_shutdown():
        logger.exception('Encountered problem with normal shutdown signal.')


+@task(queue=get_task_queuename)
+def migrate_jsonfield(table, pkfield, columns):
+    batchsize = 10000
+    with advisory_lock(f'json_migration_{table}', wait=False) as acquired:
+        if not acquired:
+            return
+
+        from django.db.migrations.executor import MigrationExecutor
+
+        # If Django is currently running migrations, wait until it is done.
+        while True:
+            executor = MigrationExecutor(connection)
+            if not executor.migration_plan(executor.loader.graph.leaf_nodes()):
+                break
+            time.sleep(120)
+
+        logger.warning(f"Migrating json fields for {table}: {', '.join(columns)}")
+
+        with connection.cursor() as cursor:
+            for i in itertools.count(0, batchsize):
+                # Are there even any rows in the table beyond this point?
+                cursor.execute(f"select count(1) from {table} where {pkfield} >= %s limit 1;", (i,))
+                if not cursor.fetchone()[0]:
+                    break
+
+                column_expr = ', '.join(f"{colname} = {colname}_old::jsonb" for colname in columns)
+                # If any of the old columns have non-null values, the data needs to be cast and copied over.
+                empty_expr = ' or '.join(f"{colname}_old is not null" for colname in columns)
+                cursor.execute(  # Only clobber the new fields if there is non-null data in the old ones.
+                    f"""
+                    update {table}
+                      set {column_expr}
+                      where {pkfield} >= %s and {pkfield} < %s
+                        and {empty_expr};
+                    """,
+                    (i, i + batchsize),
+                )
+                rows = cursor.rowcount
+                logger.debug(f"Batch {i} to {i + batchsize} copied on {table}, {rows} rows affected.")
+
+            column_expr = ', '.join(f"DROP COLUMN {column}_old" for column in columns)
+            cursor.execute(f"ALTER TABLE {table} {column_expr};")
+
+        logger.warning(f"Migration of {table} to jsonb is finished.")
+
+
@task(queue=get_task_queuename)
 def apply_cluster_membership_policies():
    from awx.main.signals import disable_activity_stream
@@ -315,14 +362,7 @@ def send_notifications(notification_list, job_id=None):

@task(queue=get_task_queuename)
 def gather_analytics():
-    from awx.conf.models import Setting
-    from rest_framework.fields import DateTimeField
-
-    last_gather = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_GATHER').first()
-    last_time = DateTimeField().to_internal_value(last_gather.value) if last_gather and last_gather.value else None
-    gather_time = now()
-
-    if not last_time or ((gather_time - last_time).total_seconds() > settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL):
+    if is_run_threshold_reached(getattr(settings, 'AUTOMATION_ANALYTICS_LAST_GATHER', None), settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL):
        analytics.gather()


@@ -379,20 +419,6 @@ def cleanup_images_and_files():
    _cleanup_images_and_files()


-@task(queue=get_task_queuename)
-def cleanup_host_metrics():
-    from awx.conf.models import Setting
-    from rest_framework.fields import DateTimeField
-
-    last_cleanup = Setting.objects.filter(key='CLEANUP_HOST_METRICS_LAST_TS').first()
-    last_time = DateTimeField().to_internal_value(last_cleanup.value) if last_cleanup and last_cleanup.value else None
-
-    cleanup_interval_secs = getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400
-    if not last_time or ((now() - last_time).total_seconds() > cleanup_interval_secs):
-        months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_THRESHOLD', 12)
-        HostMetric.cleanup_task(months_ago)
-
-
@task(queue=get_task_queuename)
 def cluster_node_health_check(node):
    """
@@ -434,7 +460,6 @@ def execution_node_health_check(node):
    data = worker_info(node)

    prior_capacity = instance.capacity
-
    instance.save_health_data(
        version='ansible-runner-' + data.get('runner_version', '???'),
        cpu=data.get('cpu_count', 0),
@@ -455,13 +480,37 @@ def execution_node_health_check(node):
    return data


-def inspect_execution_nodes(instance_list):
-    with advisory_lock('inspect_execution_nodes_lock', wait=False):
-        node_lookup = {inst.hostname: inst for inst in instance_list}
+def inspect_established_receptor_connections(mesh_status):
+    '''
+    Flips link state from ADDING to ESTABLISHED
+    If the InstanceLink source and target match the entries
+    in Known Connection Costs, flip to Established.
+    '''
+    from awx.main.models import InstanceLink

+    all_links = InstanceLink.objects.filter(link_state=InstanceLink.States.ADDING)
+    if not all_links.exists():
+        return
+    active_receptor_conns = mesh_status['KnownConnectionCosts']
+    update_links = []
+    for link in all_links:
+        if link.link_state != InstanceLink.States.REMOVING:
+            if link.target.hostname in active_receptor_conns.get(link.source.hostname, {}):
+                if link.link_state is not InstanceLink.States.ESTABLISHED:
+                    link.link_state = InstanceLink.States.ESTABLISHED
+                    update_links.append(link)
+
+    InstanceLink.objects.bulk_update(update_links, ['link_state'])
+
+
+def inspect_execution_and_hop_nodes(instance_list):
+    with advisory_lock('inspect_execution_and_hop_nodes_lock', wait=False):
+        node_lookup = {inst.hostname: inst for inst in instance_list}
        ctl = get_receptor_ctl()
        mesh_status = ctl.simple_command('status')

+        inspect_established_receptor_connections(mesh_status)
+
        nowtime = now()
        workers = mesh_status['Advertisements']

@@ -519,7 +568,7 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
            this_inst = inst
            break

-    inspect_execution_nodes(instance_list)
+    inspect_execution_and_hop_nodes(instance_list)

    for inst in list(instance_list):
        if inst == this_inst:
@@ -708,66 +757,21 @@ def awx_periodic_scheduler():
                new_unified_job.save(update_fields=['status', 'job_explanation'])
                new_unified_job.websocket_emit_status("failed")
            emit_channel_notification('schedules-changed', dict(id=schedule.id, group_name="schedules"))
-        state.save()
-
-
-def schedule_manager_success_or_error(instance):
-    if instance.unifiedjob_blocked_jobs.exists():
-        ScheduleTaskManager().schedule()
-    if instance.spawned_by_workflow:
-        ScheduleWorkflowManager().schedule()


@task(queue=get_task_queuename)
-def handle_work_success(task_actual):
-    try:
-        instance = UnifiedJob.get_instance_by_type(task_actual['type'], task_actual['id'])
-    except ObjectDoesNotExist:
-        logger.warning('Missing {} `{}` in success callback.'.format(task_actual['type'], task_actual['id']))
-        return
-    if not instance:
-        return
-    schedule_manager_success_or_error(instance)
-
-
-@task(queue=get_task_queuename)
-def handle_work_error(task_actual):
-    try:
-        instance = UnifiedJob.get_instance_by_type(task_actual['type'], task_actual['id'])
-    except ObjectDoesNotExist:
-        logger.warning('Missing {} `{}` in error callback.'.format(task_actual['type'], task_actual['id']))
-        return
-    if not instance:
-        return
-
-    subtasks = instance.get_jobs_fail_chain()  # reverse of dependent_jobs mostly
-    logger.debug(f'Executing error task id {task_actual["id"]}, subtasks: {[subtask.id for subtask in subtasks]}')
-
-    deps_of_deps = {}
-
-    for subtask in subtasks:
-        if subtask.celery_task_id != instance.celery_task_id and not subtask.cancel_flag and not subtask.status in ('successful', 'failed'):
-            # If there are multiple in the dependency chain, A->B->C, and this was called for A, blame B for clarity
-            blame_job = deps_of_deps.get(subtask.id, instance)
-            subtask.status = 'failed'
-            subtask.failed = True
-            if not subtask.job_explanation:
-                subtask.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
-                    get_type_for_model(type(blame_job)),
-                    blame_job.name,
-                    blame_job.id,
-                )
-            subtask.save()
-            subtask.websocket_emit_status("failed")
-
-            for sub_subtask in subtask.get_jobs_fail_chain():
-                deps_of_deps[sub_subtask.id] = subtask
-
-    # We only send 1 job complete message since all the job completion message
-    # handling does is trigger the scheduler. If we extend the functionality of
-    # what the job complete message handler does then we may want to send a
-    # completion event for each job here.
-    schedule_manager_success_or_error(instance)
+def handle_failure_notifications(task_ids):
+    """A task-ified version of the method that sends notifications."""
+    found_task_ids = set()
+    for instance in UnifiedJob.objects.filter(id__in=task_ids):
+        found_task_ids.add(instance.id)
+        try:
+            instance.send_notification_templates('failed')
+        except Exception:
+            logger.exception(f'Error preparing notifications for task {instance.id}')
+    deleted_tasks = set(task_ids) - found_task_ids
+    if deleted_tasks:
+        logger.warning(f'Could not send notifications for {deleted_tasks} because they were not found in the database')


@task(queue=get_task_queuename)
@@ -839,10 +843,7 @@ def delete_inventory(inventory_id, user_id, retries=5):
            user = None
    with ignore_inventory_computed_fields(), ignore_inventory_group_removal(), impersonate(user):
        try:
-            i = Inventory.objects.get(id=inventory_id)
-            for host in i.hosts.iterator():
-                host.job_events_as_primary_host.update(host=None)
-            i.delete()
+            Inventory.objects.get(id=inventory_id).delete()
            emit_channel_notification('inventories-status_changed', {'group_name': 'inventories', 'inventory_id': inventory_id, 'status': 'deleted'})
            logger.debug('Deleted inventory {} as user {}.'.format(inventory_id, user_id))
        except Inventory.DoesNotExist:
--- a/awx/main/tests/factories/fixtures.py
+++ b/awx/main/tests/factories/fixtures.py
@@ -1,6 +1,9 @@
 import json

 from django.contrib.auth.models import User
+from django.core.exceptions import ValidationError
+
+from unittest import mock

 from awx.main.models import (
    Organization,
@@ -20,6 +23,7 @@ from awx.main.models import (
    WorkflowJobNode,
    WorkflowJobTemplateNode,
 )
+from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly

 # mk methods should create only a single object of a single type.
 # they should also have the option of being persisted or not.
@@ -248,3 +252,42 @@ def mk_workflow_job_node(unified_job_template=None, success_nodes=None, failure_
    if persisted:
        workflow_node.save()
    return workflow_node
+
+
+def mk_host_metric(hostname, first_automation, last_automation=None, last_deleted=None, deleted=False, persisted=True):
+    ok, idx = False, 1
+    while not ok:
+        try:
+            with mock.patch("django.utils.timezone.now") as mock_now:
+                mock_now.return_value = first_automation
+                metric = HostMetric(
+                    hostname=hostname or f"host-{first_automation}-{idx}",
+                    first_automation=first_automation,
+                    last_automation=last_automation or first_automation,
+                    last_deleted=last_deleted,
+                    deleted=deleted,
+                )
+                metric.validate_unique()
+                if persisted:
+                    metric.save()
+                ok = True
+        except ValidationError as e:
+            # Repeat create for auto-generated hostname
+            if not hostname and e.message_dict.get('hostname', None):
+                idx += 1
+            else:
+                raise e
+
+
+def mk_host_metric_summary(date, license_consumed=0, license_capacity=0, hosts_added=0, hosts_deleted=0, indirectly_managed_hosts=0, persisted=True):
+    summary = HostMetricSummaryMonthly(
+        date=date,
+        license_consumed=license_consumed,
+        license_capacity=license_capacity,
+        hosts_added=hosts_added,
+        hosts_deleted=hosts_deleted,
+        indirectly_managed_hosts=indirectly_managed_hosts,
+    )
+    if persisted:
+        summary.save()
+    return summary
--- a/awx/main/tests/functional/api/test_instance.py
+++ b/awx/main/tests/functional/api/test_instance.py
@@ -84,5 +84,6 @@ def test_custom_hostname_regex(post, admin_user):
                "hostname": value[0],
                "node_type": "execution",
                "node_state": "installed",
+                "peers": [],
            }
            post(url=url, user=admin_user, data=data, expect=value[1])
--- a/awx/main/tests/functional/api/test_instance_peers.py
+++ b/awx/main/tests/functional/api/test_instance_peers.py
@@ -0,0 +1,342 @@
+import pytest
+import yaml
+import itertools
+from unittest import mock
+
+from django.db.utils import IntegrityError
+
+from awx.api.versioning import reverse
+from awx.main.models import Instance
+from awx.api.views.instance_install_bundle import generate_group_vars_all_yml
+
+
+def has_peer(group_vars, peer):
+    peers = group_vars.get('receptor_peers', [])
+    for p in peers:
+        if f"{p['host']}:{p['port']}" == peer:
+            return True
+    return False
+
+
+@pytest.mark.django_db
+class TestPeers:
+    @pytest.fixture(autouse=True)
+    def configure_settings(self, settings):
+        settings.IS_K8S = True
+
+    @pytest.mark.parametrize('node_type', ['control', 'hybrid'])
+    def test_prevent_peering_to_self(self, node_type):
+        """
+        cannot peer to self
+        """
+        control_instance = Instance.objects.create(hostname='abc', node_type=node_type)
+        with pytest.raises(IntegrityError):
+            control_instance.peers.add(control_instance)
+
+    @pytest.mark.parametrize('node_type', ['control', 'hybrid', 'hop', 'execution'])
+    def test_creating_node(self, node_type, admin_user, post):
+        """
+        can only add hop and execution nodes via API
+        """
+        post(
+            url=reverse('api:instance_list'),
+            data={"hostname": "abc", "node_type": node_type},
+            user=admin_user,
+            expect=400 if node_type in ['control', 'hybrid'] else 201,
+        )
+
+    def test_changing_node_type(self, admin_user, patch):
+        """
+        cannot change node type
+        """
+        hop = Instance.objects.create(hostname='abc', node_type="hop")
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': hop.pk}),
+            data={"node_type": "execution"},
+            user=admin_user,
+            expect=400,
+        )
+
+    @pytest.mark.parametrize('node_type', ['hop', 'execution'])
+    def test_listener_port_null(self, node_type, admin_user, post):
+        """
+        listener_port can be None
+        """
+        post(
+            url=reverse('api:instance_list'),
+            data={"hostname": "abc", "node_type": node_type, "listener_port": None},
+            user=admin_user,
+            expect=201,
+        )
+
+    @pytest.mark.parametrize('node_type, allowed', [('control', False), ('hybrid', False), ('hop', True), ('execution', True)])
+    def test_peers_from_control_nodes_allowed(self, node_type, allowed, post, admin_user):
+        """
+        only hop and execution nodes can have peers_from_control_nodes set to True
+        """
+        post(
+            url=reverse('api:instance_list'),
+            data={"hostname": "abc", "peers_from_control_nodes": True, "node_type": node_type, "listener_port": 6789},
+            user=admin_user,
+            expect=201 if allowed else 400,
+        )
+
+    def test_listener_port_is_required(self, admin_user, post):
+        """
+        if adding instance to peers list, that instance must have listener_port set
+        """
+        Instance.objects.create(hostname='abc', node_type="hop", listener_port=None)
+        post(
+            url=reverse('api:instance_list'),
+            data={"hostname": "ex", "peers_from_control_nodes": False, "node_type": "execution", "listener_port": None, "peers": ["abc"]},
+            user=admin_user,
+            expect=400,
+        )
+
+    def test_peers_from_control_nodes_listener_port_enabled(self, admin_user, post):
+        """
+        if peers_from_control_nodes is True, listener_port must an integer
+        Assert that all other combinations are allowed
+        """
+        for index, item in enumerate(itertools.product(['hop', 'execution'], [True, False], [None, 6789])):
+            node_type, peers_from, listener_port = item
+            # only disallowed case is when peers_from is True and listener port is None
+            disallowed = peers_from and not listener_port
+            post(
+                url=reverse('api:instance_list'),
+                data={"hostname": f"abc{index}", "peers_from_control_nodes": peers_from, "node_type": node_type, "listener_port": listener_port},
+                user=admin_user,
+                expect=400 if disallowed else 201,
+            )
+
+    @pytest.mark.parametrize('node_type', ['control', 'hybrid'])
+    def test_disallow_modifying_peers_control_nodes(self, node_type, admin_user, patch):
+        """
+        for control nodes, peers field should not be
+        modified directly via patch.
+        """
+        control = Instance.objects.create(hostname='abc', node_type=node_type)
+        hop1 = Instance.objects.create(hostname='hop1', node_type='hop', peers_from_control_nodes=True, listener_port=6789)
+        hop2 = Instance.objects.create(hostname='hop2', node_type='hop', peers_from_control_nodes=False, listener_port=6789)
+        assert [hop1] == list(control.peers.all())  # only hop1 should be peered
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': control.pk}),
+            data={"peers": ["hop2"]},
+            user=admin_user,
+            expect=400,  # cannot add peers directly
+        )
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': control.pk}),
+            data={"peers": ["hop1"]},
+            user=admin_user,
+            expect=200,  # patching with current peers list should be okay
+        )
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': control.pk}),
+            data={"peers": []},
+            user=admin_user,
+            expect=400,  # cannot remove peers directly
+        )
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': control.pk}),
+            data={},
+            user=admin_user,
+            expect=200,  # patching without data should be fine too
+        )
+        # patch hop2
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': hop2.pk}),
+            data={"peers_from_control_nodes": True},
+            user=admin_user,
+            expect=200,  # patching without data should be fine too
+        )
+        assert {hop1, hop2} == set(control.peers.all())  # hop1 and hop2 should now be peered from control node
+
+    def test_disallow_changing_hostname(self, admin_user, patch):
+        """
+        cannot change hostname
+        """
+        hop = Instance.objects.create(hostname='hop', node_type='hop')
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': hop.pk}),
+            data={"hostname": "hop2"},
+            user=admin_user,
+            expect=400,
+        )
+
+    def test_disallow_changing_node_state(self, admin_user, patch):
+        """
+        only allow setting to deprovisioning
+        """
+        hop = Instance.objects.create(hostname='hop', node_type='hop', node_state='installed')
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': hop.pk}),
+            data={"node_state": "deprovisioning"},
+            user=admin_user,
+            expect=200,
+        )
+        patch(
+            url=reverse('api:instance_detail', kwargs={'pk': hop.pk}),
+            data={"node_state": "ready"},
+            user=admin_user,
+            expect=400,
+        )
+
+    @pytest.mark.parametrize('node_type', ['control', 'hybrid'])
+    def test_control_node_automatically_peers(self, node_type):
+        """
+        a new control node should automatically
+        peer to hop
+
+        peer to hop should be removed if hop is deleted
+        """
+
+        hop = Instance.objects.create(hostname='hop', node_type='hop', peers_from_control_nodes=True, listener_port=6789)
+        control = Instance.objects.create(hostname='abc', node_type=node_type)
+        assert hop in control.peers.all()
+        hop.delete()
+        assert not control.peers.exists()
+
+    @pytest.mark.parametrize('node_type', ['control', 'hybrid'])
+    def test_control_node_retains_other_peers(self, node_type):
+        """
+        if a new node comes online, other peer relationships should
+        remain intact
+        """
+        hop1 = Instance.objects.create(hostname='hop1', node_type='hop', listener_port=6789, peers_from_control_nodes=True)
+        hop2 = Instance.objects.create(hostname='hop2', node_type='hop', listener_port=6789, peers_from_control_nodes=False)
+        hop1.peers.add(hop2)
+
+        # a control node is added
+        Instance.objects.create(hostname='control', node_type=node_type, listener_port=None)
+
+        assert hop1.peers.exists()
+
+    def test_group_vars(self, get, admin_user):
+        """
+        control > hop1 > hop2 < execution
+        """
+        control = Instance.objects.create(hostname='control', node_type='control', listener_port=None)
+        hop1 = Instance.objects.create(hostname='hop1', node_type='hop', listener_port=6789, peers_from_control_nodes=True)
+        hop2 = Instance.objects.create(hostname='hop2', node_type='hop', listener_port=6789, peers_from_control_nodes=False)
+        execution = Instance.objects.create(hostname='execution', node_type='execution', listener_port=6789)
+
+        execution.peers.add(hop2)
+        hop1.peers.add(hop2)
+
+        control_vars = yaml.safe_load(generate_group_vars_all_yml(control))
+        hop1_vars = yaml.safe_load(generate_group_vars_all_yml(hop1))
+        hop2_vars = yaml.safe_load(generate_group_vars_all_yml(hop2))
+        execution_vars = yaml.safe_load(generate_group_vars_all_yml(execution))
+
+        # control group vars assertions
+        assert has_peer(control_vars, 'hop1:6789')
+        assert not has_peer(control_vars, 'hop2:6789')
+        assert not has_peer(control_vars, 'execution:6789')
+        assert not control_vars.get('receptor_listener', False)
+
+        # hop1 group vars assertions
+        assert has_peer(hop1_vars, 'hop2:6789')
+        assert not has_peer(hop1_vars, 'execution:6789')
+        assert hop1_vars.get('receptor_listener', False)
+
+        # hop2 group vars assertions
+        assert not has_peer(hop2_vars, 'hop1:6789')
+        assert not has_peer(hop2_vars, 'execution:6789')
+        assert hop2_vars.get('receptor_listener', False)
+        assert hop2_vars.get('receptor_peers', []) == []
+
+        # execution group vars assertions
+        assert has_peer(execution_vars, 'hop2:6789')
+        assert not has_peer(execution_vars, 'hop1:6789')
+        assert execution_vars.get('receptor_listener', False)
+
+    def test_write_receptor_config_called(self):
+        """
+        Assert that write_receptor_config is called
+        when certain instances are created, or if
+        peers_from_control_nodes changes.
+        In general, write_receptor_config should only
+        be called when necessary, as it will reload
+        receptor backend connections which is not trivial.
+        """
+        with mock.patch('awx.main.models.ha.schedule_write_receptor_config') as write_method:
+            # new control instance but nothing to peer to (no)
+            control = Instance.objects.create(hostname='control1', node_type='control')
+            write_method.assert_not_called()
+
+            # new hop node with peers_from_control_nodes False (no)
+            hop1 = Instance.objects.create(hostname='hop1', node_type='hop', listener_port=6789, peers_from_control_nodes=False)
+            hop1.delete()
+            write_method.assert_not_called()
+
+            # new hop node with peers_from_control_nodes True (yes)
+            hop1 = Instance.objects.create(hostname='hop1', node_type='hop', listener_port=6789, peers_from_control_nodes=True)
+            write_method.assert_called()
+            write_method.reset_mock()
+
+            # new control instance but with something to peer to (yes)
+            Instance.objects.create(hostname='control2', node_type='control')
+            write_method.assert_called()
+            write_method.reset_mock()
+
+            # new hop node with peers_from_control_nodes False and peered to another hop node (no)
+            hop2 = Instance.objects.create(hostname='hop2', node_type='hop', listener_port=6789, peers_from_control_nodes=False)
+            hop2.peers.add(hop1)
+            hop2.delete()
+            write_method.assert_not_called()
+
+            # changing peers_from_control_nodes to False (yes)
+            hop1.peers_from_control_nodes = False
+            hop1.save()
+            write_method.assert_called()
+            write_method.reset_mock()
+
+            # deleting hop node that has peers_from_control_nodes to False (no)
+            hop1.delete()
+            write_method.assert_not_called()
+
+            # deleting control nodes (no)
+            control.delete()
+            write_method.assert_not_called()
+
+    def test_write_receptor_config_data(self):
+        """
+        Assert the correct peers are included in data that will
+        be written to receptor.conf
+        """
+        from awx.main.tasks.receptor import RECEPTOR_CONFIG_STARTER
+
+        with mock.patch('awx.main.tasks.receptor.read_receptor_config', return_value=list(RECEPTOR_CONFIG_STARTER)):
+            from awx.main.tasks.receptor import generate_config_data
+
+            _, should_update = generate_config_data()
+            assert not should_update
+
+            # not peered, so config file should not be updated
+            for i in range(3):
+                Instance.objects.create(hostname=f"exNo-{i}", node_type='execution', listener_port=6789, peers_from_control_nodes=False)
+
+            _, should_update = generate_config_data()
+            assert not should_update
+
+            # peered, so config file should be updated
+            expected_peers = []
+            for i in range(3):
+                expected_peers.append(f"hop-{i}:6789")
+                Instance.objects.create(hostname=f"hop-{i}", node_type='hop', listener_port=6789, peers_from_control_nodes=True)
+
+            for i in range(3):
+                expected_peers.append(f"exYes-{i}:6789")
+                Instance.objects.create(hostname=f"exYes-{i}", node_type='execution', listener_port=6789, peers_from_control_nodes=True)
+
+            new_config, should_update = generate_config_data()
+            assert should_update
+
+            peers = []
+            for entry in new_config:
+                for key, value in entry.items():
+                    if key == "tcp-peer":
+                        peers.append(value['address'])
+
+            assert set(expected_peers) == set(peers)
--- a/awx/main/tests/functional/commands/test_cleanup_host_metrics.py
+++ b/awx/main/tests/functional/commands/test_cleanup_host_metrics.py
@@ -0,0 +1,78 @@
+import pytest
+
+from awx.main.tasks.host_metrics import HostMetricTask
+from awx.main.models.inventory import HostMetric
+from awx.main.tests.factories.fixtures import mk_host_metric
+from dateutil.relativedelta import relativedelta
+from django.conf import settings
+from django.utils import timezone
+
+
+@pytest.mark.django_db
+def test_no_host_metrics():
+    """No-crash test"""
+    assert HostMetric.objects.count() == 0
+    HostMetricTask().cleanup(soft_threshold=0, hard_threshold=0)
+    HostMetricTask().cleanup(soft_threshold=24, hard_threshold=42)
+    assert HostMetric.objects.count() == 0
+
+
+@pytest.mark.django_db
+def test_delete_exception():
+    """Crash test"""
+    with pytest.raises(ValueError):
+        HostMetricTask().soft_cleanup("")
+    with pytest.raises(TypeError):
+        HostMetricTask().hard_cleanup(set())
+
+
+@pytest.mark.django_db
+@pytest.mark.parametrize('threshold', [settings.CLEANUP_HOST_METRICS_SOFT_THRESHOLD, 20])
+def test_soft_delete(threshold):
+    """Metrics with last_automation < threshold are updated to deleted=True"""
+    mk_host_metric('host_1', first_automation=ago(months=1), last_automation=ago(months=1), deleted=False)
+    mk_host_metric('host_2', first_automation=ago(months=1), last_automation=ago(months=1), deleted=True)
+    mk_host_metric('host_3', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=-1), deleted=False)
+    mk_host_metric('host_4', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=-1), deleted=True)
+    mk_host_metric('host_5', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=1), deleted=False)
+    mk_host_metric('host_6', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=1), deleted=True)
+    mk_host_metric('host_7', first_automation=ago(months=1), last_automation=ago(months=42), deleted=False)
+    mk_host_metric('host_8', first_automation=ago(months=1), last_automation=ago(months=42), deleted=True)
+
+    assert HostMetric.objects.count() == 8
+    assert HostMetric.active_objects.count() == 4
+
+    for i in range(2):
+        HostMetricTask().cleanup(soft_threshold=threshold)
+        assert HostMetric.objects.count() == 8
+
+        hostnames = set(HostMetric.objects.filter(deleted=False).order_by('hostname').values_list('hostname', flat=True))
+        assert hostnames == {'host_1', 'host_3'}
+
+
+@pytest.mark.django_db
+@pytest.mark.parametrize('threshold', [settings.CLEANUP_HOST_METRICS_HARD_THRESHOLD, 20])
+def test_hard_delete(threshold):
+    """Metrics with last_deleted < threshold and deleted=True are deleted from the db"""
+    mk_host_metric('host_1', first_automation=ago(months=1), last_deleted=ago(months=1), deleted=False)
+    mk_host_metric('host_2', first_automation=ago(months=1), last_deleted=ago(months=1), deleted=True)
+    mk_host_metric('host_3', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=-1), deleted=False)
+    mk_host_metric('host_4', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=-1), deleted=True)
+    mk_host_metric('host_5', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=1), deleted=False)
+    mk_host_metric('host_6', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=1), deleted=True)
+    mk_host_metric('host_7', first_automation=ago(months=1), last_deleted=ago(months=42), deleted=False)
+    mk_host_metric('host_8', first_automation=ago(months=1), last_deleted=ago(months=42), deleted=True)
+
+    assert HostMetric.objects.count() == 8
+    assert HostMetric.active_objects.count() == 4
+
+    for i in range(2):
+        HostMetricTask().cleanup(hard_threshold=threshold)
+        assert HostMetric.objects.count() == 6
+
+        hostnames = set(HostMetric.objects.order_by('hostname').values_list('hostname', flat=True))
+        assert hostnames == {'host_1', 'host_2', 'host_3', 'host_4', 'host_5', 'host_7'}
+
+
+def ago(months=0, hours=0):
+    return timezone.now() - relativedelta(months=months, hours=hours)
--- a/awx/main/tests/functional/commands/test_host_metric_summary_monthly.py
+++ b/awx/main/tests/functional/commands/test_host_metric_summary_monthly.py
@@ -0,0 +1,382 @@
+import pytest
+import datetime
+from dateutil.relativedelta import relativedelta
+from django.conf import settings
+from django.utils import timezone
+
+
+from awx.main.management.commands.host_metric_summary_monthly import Command
+from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
+from awx.main.tests.factories.fixtures import mk_host_metric, mk_host_metric_summary
+
+
+@pytest.fixture
+def threshold():
+    return int(getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36))
+
+
+@pytest.mark.django_db
+@pytest.mark.parametrize("metrics_cnt", [0, 1, 2, 3])
+@pytest.mark.parametrize("mode", ["old_data", "actual_data", "all_data"])
+def test_summaries_counts(threshold, metrics_cnt, mode):
+    assert HostMetricSummaryMonthly.objects.count() == 0
+
+    for idx in range(metrics_cnt):
+        if mode == "old_data" or mode == "all_data":
+            mk_host_metric(None, months_ago(threshold + idx, "dt"))
+        elif mode == "actual_data" or mode == "all_data":
+            mk_host_metric(None, (months_ago(threshold - idx, "dt")))
+
+    Command().handle()
+
+    # Number of records is equal to host metrics' hard cleanup months
+    assert HostMetricSummaryMonthly.objects.count() == threshold
+
+    # Records start with date in the month following to the threshold month
+    date = months_ago(threshold - 1)
+    for metric in list(HostMetricSummaryMonthly.objects.order_by('date').all()):
+        assert metric.date == date
+        date += relativedelta(months=1)
+
+    # Older record are untouched
+    mk_host_metric_summary(date=months_ago(threshold + 10))
+    Command().handle()
+
+    assert HostMetricSummaryMonthly.objects.count() == threshold + 1
+
+
+@pytest.mark.django_db
+@pytest.mark.parametrize("mode", ["old_data", "actual_data", "all_data"])
+def test_summary_values(threshold, mode):
+    tester = {"old_data": MetricsTesterOldData(threshold), "actual_data": MetricsTesterActualData(threshold), "all_data": MetricsTesterCombinedData(threshold)}[
+        mode
+    ]
+
+    for iteration in ["create_metrics", "add_old_summaries", "change_metrics", "delete_metrics", "add_metrics"]:
+        getattr(tester, iteration)()  # call method by string
+
+        # Operation is idempotent, repeat twice
+        for _ in range(2):
+            Command().handle()
+            # call assert method by string
+            getattr(tester, f"assert_{iteration}")()
+
+
+class MetricsTester:
+    def __init__(self, threshold, ignore_asserts=False):
+        self.threshold = threshold
+        self.expected_summaries = {}
+        self.ignore_asserts = ignore_asserts
+
+    def add_old_summaries(self):
+        """These records don't correspond with Host metrics"""
+        mk_host_metric_summary(self.below(4), license_consumed=100, hosts_added=10, hosts_deleted=5)
+        mk_host_metric_summary(self.below(3), license_consumed=105, hosts_added=20, hosts_deleted=10)
+        mk_host_metric_summary(self.below(2), license_consumed=115, hosts_added=60, hosts_deleted=75)
+
+    def assert_add_old_summaries(self):
+        """Old summary records should be untouched"""
+        self.expected_summaries[self.below(4)] = {"date": self.below(4), "license_consumed": 100, "hosts_added": 10, "hosts_deleted": 5}
+        self.expected_summaries[self.below(3)] = {"date": self.below(3), "license_consumed": 105, "hosts_added": 20, "hosts_deleted": 10}
+        self.expected_summaries[self.below(2)] = {"date": self.below(2), "license_consumed": 115, "hosts_added": 60, "hosts_deleted": 75}
+
+        self.assert_host_metric_summaries()
+
+    def assert_host_metric_summaries(self):
+        """Ignore asserts when old/actual test object is used only as a helper for Combined test"""
+        if self.ignore_asserts:
+            return True
+
+        for summary in list(HostMetricSummaryMonthly.objects.order_by('date').all()):
+            assert self.expected_summaries.get(summary.date, None) is not None
+
+            assert self.expected_summaries[summary.date] == {
+                "date": summary.date,
+                "license_consumed": summary.license_consumed,
+                "hosts_added": summary.hosts_added,
+                "hosts_deleted": summary.hosts_deleted,
+            }
+
+    def below(self, months, fmt="date"):
+        """months below threshold, returns first date of that month"""
+        date = months_ago(self.threshold + months)
+        if fmt == "dt":
+            return timezone.make_aware(datetime.datetime.combine(date, datetime.datetime.min.time()))
+        else:
+            return date
+
+    def above(self, months, fmt="date"):
+        """months above threshold, returns first date of that month"""
+        date = months_ago(self.threshold - months)
+        if fmt == "dt":
+            return timezone.make_aware(datetime.datetime.combine(date, datetime.datetime.min.time()))
+        else:
+            return date
+
+
+class MetricsTesterOldData(MetricsTester):
+    def create_metrics(self):
+        """Creates 7 host metrics older than delete threshold"""
+        mk_host_metric("host_1", first_automation=self.below(3, "dt"))
+        mk_host_metric("host_2", first_automation=self.below(2, "dt"))
+        mk_host_metric("host_3", first_automation=self.below(2, "dt"), last_deleted=self.above(2, "dt"), deleted=False)
+        mk_host_metric("host_4", first_automation=self.below(2, "dt"), last_deleted=self.above(2, "dt"), deleted=True)
+        mk_host_metric("host_5", first_automation=self.below(2, "dt"), last_deleted=self.below(2, "dt"), deleted=True)
+        mk_host_metric("host_6", first_automation=self.below(1, "dt"), last_deleted=self.below(1, "dt"), deleted=False)
+        mk_host_metric("host_7", first_automation=self.below(1, "dt"))
+
+    def assert_create_metrics(self):
+        """
+        Month 1 is computed from older host metrics,
+        Month 2 has deletion (host_4)
+        Other months are unchanged (same as month 2)
+        """
+        self.expected_summaries = {
+            self.above(1): {"date": self.above(1), "license_consumed": 6, "hosts_added": 0, "hosts_deleted": 0},
+            self.above(2): {"date": self.above(2), "license_consumed": 5, "hosts_added": 0, "hosts_deleted": 1},
+        }
+        # no change in months 3+
+        idx = 3
+        month = self.above(idx)
+        while month <= beginning_of_the_month():
+            self.expected_summaries[self.above(idx)] = {"date": self.above(idx), "license_consumed": 5, "hosts_added": 0, "hosts_deleted": 0}
+            month += relativedelta(months=1)
+            idx += 1
+
+        self.assert_host_metric_summaries()
+
+    def add_old_summaries(self):
+        super().add_old_summaries()
+
+    def assert_add_old_summaries(self):
+        super().assert_add_old_summaries()
+
+    @staticmethod
+    def change_metrics():
+        """Hosts 1,2 soft deleted, host_4 automated again (undeleted)"""
+        HostMetric.objects.filter(hostname='host_1').update(last_deleted=beginning_of_the_month("dt"), deleted=True)
+        HostMetric.objects.filter(hostname='host_2').update(last_deleted=timezone.now(), deleted=True)
+        HostMetric.objects.filter(hostname='host_4').update(deleted=False)
+
+    def assert_change_metrics(self):
+        """
+        Summaries since month 2 were changed (host_4 restored == automated again)
+        Current month has 2 deletions (host_1, host_2)
+        """
+        self.expected_summaries[self.above(2)] |= {'hosts_deleted': 0}
+        for idx in range(2, self.threshold):
+            self.expected_summaries[self.above(idx)] |= {'license_consumed': 6}
+        self.expected_summaries[beginning_of_the_month()] |= {'license_consumed': 4, 'hosts_deleted': 2}
+
+        self.assert_host_metric_summaries()
+
+    @staticmethod
+    def delete_metrics():
+        """Deletes metric deleted before the threshold"""
+        HostMetric.objects.filter(hostname='host_5').delete()
+
+    def assert_delete_metrics(self):
+        """No change"""
+        self.assert_host_metric_summaries()
+
+    @staticmethod
+    def add_metrics():
+        """Adds new metrics"""
+        mk_host_metric("host_24", first_automation=beginning_of_the_month("dt"))
+        mk_host_metric("host_25", first_automation=beginning_of_the_month("dt"))  # timezone.now())
+
+    def assert_add_metrics(self):
+        """Summary in current month is updated"""
+        self.expected_summaries[beginning_of_the_month()]['license_consumed'] = 6
+        self.expected_summaries[beginning_of_the_month()]['hosts_added'] = 2
+
+        self.assert_host_metric_summaries()
+
+
+class MetricsTesterActualData(MetricsTester):
+    def create_metrics(self):
+        """Creates 16 host metrics newer than delete threshold"""
+        mk_host_metric("host_8", first_automation=self.above(1, "dt"))
+        mk_host_metric("host_9", first_automation=self.above(1, "dt"), last_deleted=self.above(1, "dt"))
+        mk_host_metric("host_10", first_automation=self.above(1, "dt"), last_deleted=self.above(1, "dt"), deleted=True)
+        mk_host_metric("host_11", first_automation=self.above(1, "dt"), last_deleted=self.above(2, "dt"))
+        mk_host_metric("host_12", first_automation=self.above(1, "dt"), last_deleted=self.above(2, "dt"), deleted=True)
+        mk_host_metric("host_13", first_automation=self.above(2, "dt"))
+        mk_host_metric("host_14", first_automation=self.above(2, "dt"), last_deleted=self.above(2, "dt"))
+        mk_host_metric("host_15", first_automation=self.above(2, "dt"), last_deleted=self.above(2, "dt"), deleted=True)
+        mk_host_metric("host_16", first_automation=self.above(2, "dt"), last_deleted=self.above(3, "dt"))
+        mk_host_metric("host_17", first_automation=self.above(2, "dt"), last_deleted=self.above(3, "dt"), deleted=True)
+        mk_host_metric("host_18", first_automation=self.above(4, "dt"))
+        # next one shouldn't happen in real (deleted=True, last_deleted = NULL)
+        mk_host_metric("host_19", first_automation=self.above(4, "dt"), deleted=True)
+        mk_host_metric("host_20", first_automation=self.above(4, "dt"), last_deleted=self.above(4, "dt"))
+        mk_host_metric("host_21", first_automation=self.above(4, "dt"), last_deleted=self.above(4, "dt"), deleted=True)
+        mk_host_metric("host_22", first_automation=self.above(4, "dt"), last_deleted=self.above(5, "dt"))
+        mk_host_metric("host_23", first_automation=self.above(4, "dt"), last_deleted=self.above(5, "dt"), deleted=True)
+
+    def assert_create_metrics(self):
+        self.expected_summaries = {
+            self.above(1): {"date": self.above(1), "license_consumed": 4, "hosts_added": 5, "hosts_deleted": 1},
+            self.above(2): {"date": self.above(2), "license_consumed": 7, "hosts_added": 5, "hosts_deleted": 2},
+            self.above(3): {"date": self.above(3), "license_consumed": 6, "hosts_added": 0, "hosts_deleted": 1},
+            self.above(4): {"date": self.above(4), "license_consumed": 11, "hosts_added": 6, "hosts_deleted": 1},
+            self.above(5): {"date": self.above(5), "license_consumed": 10, "hosts_added": 0, "hosts_deleted": 1},
+        }
+        # no change in months 6+
+        idx = 6
+        month = self.above(idx)
+        while month <= beginning_of_the_month():
+            self.expected_summaries[self.above(idx)] = {"date": self.above(idx), "license_consumed": 10, "hosts_added": 0, "hosts_deleted": 0}
+            month += relativedelta(months=1)
+            idx += 1
+
+        self.assert_host_metric_summaries()
+
+    def add_old_summaries(self):
+        super().add_old_summaries()
+
+    def assert_add_old_summaries(self):
+        super().assert_add_old_summaries()
+
+    @staticmethod
+    def change_metrics():
+        """
+        - Hosts 12, 19, 21 were automated again (undeleted)
+        - Host 16 was soft deleted
+        - Host 17 was undeleted and soft deleted again
+        """
+        HostMetric.objects.filter(hostname='host_12').update(deleted=False)
+        HostMetric.objects.filter(hostname='host_16').update(last_deleted=timezone.now(), deleted=True)
+        HostMetric.objects.filter(hostname='host_17').update(last_deleted=beginning_of_the_month("dt"), deleted=True)
+        HostMetric.objects.filter(hostname='host_19').update(deleted=False)
+        HostMetric.objects.filter(hostname='host_21').update(deleted=False)
+
+    def assert_change_metrics(self):
+        """
+        Summaries since month 2 were changed
+        Current month has 2 deletions (host_16, host_17)
+        """
+        self.expected_summaries[self.above(2)] |= {'license_consumed': 8, 'hosts_deleted': 1}
+        self.expected_summaries[self.above(3)] |= {'license_consumed': 8, 'hosts_deleted': 0}
+        self.expected_summaries[self.above(4)] |= {'license_consumed': 14, 'hosts_deleted': 0}
+
+        # month 5 had hosts_deleted 1 => license_consumed == 14 - 1
+        for idx in range(5, self.threshold):
+            self.expected_summaries[self.above(idx)] |= {'license_consumed': 13}
+        self.expected_summaries[beginning_of_the_month()] |= {'license_consumed': 11, 'hosts_deleted': 2}
+
+        self.assert_host_metric_summaries()
+
+    def delete_metrics(self):
+        """Hard cleanup can't delete metrics newer than threshold. No change"""
+        pass
+
+    def assert_delete_metrics(self):
+        """No change"""
+        self.assert_host_metric_summaries()
+
+    @staticmethod
+    def add_metrics():
+        """Adds new metrics"""
+        mk_host_metric("host_26", first_automation=beginning_of_the_month("dt"))
+        mk_host_metric("host_27", first_automation=timezone.now())
+
+    def assert_add_metrics(self):
+        """
+        Two metrics were deleted in current month by change_metrics()
+        Two metrics are added now
+        => license_consumed is equal to the previous month (13 - 2 + 2)
+        """
+        self.expected_summaries[beginning_of_the_month()] |= {'license_consumed': 13, 'hosts_added': 2}
+
+        self.assert_host_metric_summaries()
+
+
+class MetricsTesterCombinedData(MetricsTester):
+    def __init__(self, threshold):
+        super().__init__(threshold)
+        self.old_data = MetricsTesterOldData(threshold, ignore_asserts=True)
+        self.actual_data = MetricsTesterActualData(threshold, ignore_asserts=True)
+
+    def assert_host_metric_summaries(self):
+        self._combine_expected_summaries()
+        super().assert_host_metric_summaries()
+
+    def create_metrics(self):
+        self.old_data.create_metrics()
+        self.actual_data.create_metrics()
+
+    def assert_create_metrics(self):
+        self.old_data.assert_create_metrics()
+        self.actual_data.assert_create_metrics()
+
+        self.assert_host_metric_summaries()
+
+    def add_old_summaries(self):
+        super().add_old_summaries()
+
+    def assert_add_old_summaries(self):
+        self.old_data.assert_add_old_summaries()
+        self.actual_data.assert_add_old_summaries()
+
+        self.assert_host_metric_summaries()
+
+    def change_metrics(self):
+        self.old_data.change_metrics()
+        self.actual_data.change_metrics()
+
+    def assert_change_metrics(self):
+        self.old_data.assert_change_metrics()
+        self.actual_data.assert_change_metrics()
+
+        self.assert_host_metric_summaries()
+
+    def delete_metrics(self):
+        self.old_data.delete_metrics()
+        self.actual_data.delete_metrics()
+
+    def assert_delete_metrics(self):
+        self.old_data.assert_delete_metrics()
+        self.actual_data.assert_delete_metrics()
+
+        self.assert_host_metric_summaries()
+
+    def add_metrics(self):
+        self.old_data.add_metrics()
+        self.actual_data.add_metrics()
+
+    def assert_add_metrics(self):
+        self.old_data.assert_add_metrics()
+        self.actual_data.assert_add_metrics()
+
+        self.assert_host_metric_summaries()
+
+    def _combine_expected_summaries(self):
+        """
+        Expected summaries are sum of expected values for tests with old and actual data
+        Except data older than hard delete threshold (these summaries are untouched by task => the same in all tests)
+        """
+        for date, summary in self.old_data.expected_summaries.items():
+            if date <= months_ago(self.threshold):
+                license_consumed = summary['license_consumed']
+                hosts_added = summary['hosts_added']
+                hosts_deleted = summary['hosts_deleted']
+            else:
+                license_consumed = summary['license_consumed'] + self.actual_data.expected_summaries[date]['license_consumed']
+                hosts_added = summary['hosts_added'] + self.actual_data.expected_summaries[date]['hosts_added']
+                hosts_deleted = summary['hosts_deleted'] + self.actual_data.expected_summaries[date]['hosts_deleted']
+            self.expected_summaries[date] = {'date': date, 'license_consumed': license_consumed, 'hosts_added': hosts_added, 'hosts_deleted': hosts_deleted}
+
+
+def months_ago(num, fmt="date"):
+    if num is None:
+        return None
+    return beginning_of_the_month(fmt) - relativedelta(months=num)
+
+
+def beginning_of_the_month(fmt="date"):
+    date = datetime.date.today().replace(day=1)
+    if fmt == "dt":
+        return timezone.make_aware(datetime.datetime.combine(date, datetime.datetime.min.time()))
+    else:
+        return date
--- a/awx/main/tests/functional/task_management/test_scheduler.py
+++ b/awx/main/tests/functional/task_management/test_scheduler.py
@@ -331,15 +331,13 @@ def test_single_job_dependencies_project_launch(controlplane_instance_group, job
    p.save(skip_update=True)
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        dm = DependencyManager()
-        with mock.patch.object(DependencyManager, "create_project_update", wraps=dm.create_project_update) as mock_pu:
-            dm.schedule()
-            mock_pu.assert_called_once_with(j)
-            pu = [x for x in p.project_updates.all()]
-            assert len(pu) == 1
-            TaskManager().schedule()
-            TaskManager.start_task.assert_called_once_with(pu[0], controlplane_instance_group, instance)
-            pu[0].status = "successful"
-            pu[0].save()
+        dm.schedule()
+        pu = [x for x in p.project_updates.all()]
+        assert len(pu) == 1
+        TaskManager().schedule()
+        TaskManager.start_task.assert_called_once_with(pu[0], controlplane_instance_group, instance)
+        pu[0].status = "successful"
+        pu[0].save()
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        TaskManager().schedule()
        TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, instance)
@@ -359,15 +357,14 @@ def test_single_job_dependencies_inventory_update_launch(controlplane_instance_g
    i.inventory_sources.add(ii)
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        dm = DependencyManager()
-        with mock.patch.object(DependencyManager, "create_inventory_update", wraps=dm.create_inventory_update) as mock_iu:
-            dm.schedule()
-            mock_iu.assert_called_once_with(j, ii)
-            iu = [x for x in ii.inventory_updates.all()]
-            assert len(iu) == 1
-            TaskManager().schedule()
-            TaskManager.start_task.assert_called_once_with(iu[0], controlplane_instance_group, instance)
-            iu[0].status = "successful"
-            iu[0].save()
+        dm.schedule()
+        assert ii.inventory_updates.count() == 1
+        iu = [x for x in ii.inventory_updates.all()]
+        assert len(iu) == 1
+        TaskManager().schedule()
+        TaskManager.start_task.assert_called_once_with(iu[0], controlplane_instance_group, instance)
+        iu[0].status = "successful"
+        iu[0].save()
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        TaskManager().schedule()
        TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, instance)
@@ -382,11 +379,11 @@ def test_inventory_update_launches_project_update(controlplane_instance_group, s
    iu = ii.create_inventory_update()
    iu.status = "pending"
    iu.save()
+    assert project.project_updates.count() == 0
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        dm = DependencyManager()
-        with mock.patch.object(DependencyManager, "create_project_update", wraps=dm.create_project_update) as mock_pu:
-            dm.schedule()
-            mock_pu.assert_called_with(iu, project_id=project.id)
+        dm.schedule()
+    assert project.project_updates.count() == 1


@pytest.mark.django_db
@@ -407,9 +404,8 @@ def test_job_dependency_with_already_updated(controlplane_instance_group, job_te
    j.save()
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        dm = DependencyManager()
-        with mock.patch.object(DependencyManager, "create_inventory_update", wraps=dm.create_inventory_update) as mock_iu:
-            dm.schedule()
-            mock_iu.assert_not_called()
+        dm.schedule()
+        assert ii.inventory_updates.count() == 0
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        TaskManager().schedule()
        TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, instance)
@@ -442,7 +438,9 @@ def test_shared_dependencies_launch(controlplane_instance_group, job_template_fa
        TaskManager().schedule()
        pu = p.project_updates.first()
        iu = ii.inventory_updates.first()
-        TaskManager.start_task.assert_has_calls([mock.call(iu, controlplane_instance_group, instance), mock.call(pu, controlplane_instance_group, instance)])
+        TaskManager.start_task.assert_has_calls(
+            [mock.call(iu, controlplane_instance_group, instance), mock.call(pu, controlplane_instance_group, instance)], any_order=True
+        )
        pu.status = "successful"
        pu.finished = pu.created + timedelta(seconds=1)
        pu.save()
@@ -451,7 +449,9 @@ def test_shared_dependencies_launch(controlplane_instance_group, job_template_fa
        iu.save()
    with mock.patch("awx.main.scheduler.TaskManager.start_task"):
        TaskManager().schedule()
-        TaskManager.start_task.assert_has_calls([mock.call(j1, controlplane_instance_group, instance), mock.call(j2, controlplane_instance_group, instance)])
+        TaskManager.start_task.assert_has_calls(
+            [mock.call(j1, controlplane_instance_group, instance), mock.call(j2, controlplane_instance_group, instance)], any_order=True
+        )
    pu = [x for x in p.project_updates.all()]
    iu = [x for x in ii.inventory_updates.all()]
    assert len(pu) == 1
--- a/awx/main/tests/functional/test_credential_plugins.py
+++ b/awx/main/tests/functional/test_credential_plugins.py
@@ -76,3 +76,24 @@ def test_hashivault_handle_auth_kubernetes():
 def test_hashivault_handle_auth_not_enough_args():
    with pytest.raises(Exception):
        hashivault.handle_auth()
+
+
+class TestDelineaImports:
+    """
+    These module have a try-except for ImportError which will allow using the older library
+    but we do not want the awx_devel image to have the older library,
+    so these tests are designed to fail if these wind up using the fallback import
+    """
+
+    def test_dsv_import(self):
+        from awx.main.credential_plugins.dsv import SecretsVault  # noqa
+
+        # assert this module as opposed to older thycotic.secrets.vault
+        assert SecretsVault.__module__ == 'delinea.secrets.vault'
+
+    def test_tss_import(self):
+        from awx.main.credential_plugins.tss import DomainPasswordGrantAuthorizer, PasswordGrantAuthorizer, SecretServer, ServerSecret  # noqa
+
+        for cls in (DomainPasswordGrantAuthorizer, PasswordGrantAuthorizer, SecretServer, ServerSecret):
+            # assert this module as opposed to older thycotic.secrets.server
+            assert cls.__module__ == 'delinea.secrets.server'
--- a/awx/main/tests/functional/test_dispatch.py
+++ b/awx/main/tests/functional/test_dispatch.py
@@ -3,6 +3,7 @@ import multiprocessing
 import random
 import signal
 import time
+import yaml
 from unittest import mock

 from django.utils.timezone import now as tz_now
@@ -13,6 +14,7 @@ from awx.main.dispatch import reaper
 from awx.main.dispatch.pool import StatefulPoolWorker, WorkerPool, AutoscalePool
 from awx.main.dispatch.publish import task
 from awx.main.dispatch.worker import BaseWorker, TaskWorker
+from awx.main.dispatch.periodic import Scheduler


 '''
@@ -439,3 +441,76 @@ class TestJobReaper(object):
        assert job.started > ref_time
        assert job.status == 'running'
        assert job.job_explanation == ''
+
+
+@pytest.mark.django_db
+class TestScheduler:
+    def test_too_many_schedules_freak_out(self):
+        with pytest.raises(RuntimeError):
+            Scheduler({'job1': {'schedule': datetime.timedelta(seconds=1)}, 'job2': {'schedule': datetime.timedelta(seconds=1)}})
+
+    def test_spread_out(self):
+        scheduler = Scheduler(
+            {
+                'job1': {'schedule': datetime.timedelta(seconds=16)},
+                'job2': {'schedule': datetime.timedelta(seconds=16)},
+                'job3': {'schedule': datetime.timedelta(seconds=16)},
+                'job4': {'schedule': datetime.timedelta(seconds=16)},
+            }
+        )
+        assert [job.offset for job in scheduler.jobs] == [0, 4, 8, 12]
+
+    def test_missed_schedule(self, mocker):
+        scheduler = Scheduler({'job1': {'schedule': datetime.timedelta(seconds=10)}})
+        assert scheduler.jobs[0].missed_runs(time.time() - scheduler.global_start) == 0
+        mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 50)
+        scheduler.get_and_mark_pending()
+        assert scheduler.jobs[0].missed_runs(50) > 1
+
+    def test_advance_schedule(self, mocker):
+        scheduler = Scheduler(
+            {
+                'job1': {'schedule': datetime.timedelta(seconds=30)},
+                'joba': {'schedule': datetime.timedelta(seconds=20)},
+                'jobb': {'schedule': datetime.timedelta(seconds=20)},
+            }
+        )
+        for job in scheduler.jobs:
+            # HACK: the offsets automatically added make this a hard test to write... so remove offsets
+            job.offset = 0.0
+        mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 29)
+        to_run = scheduler.get_and_mark_pending()
+        assert set(job.name for job in to_run) == set(['joba', 'jobb'])
+        mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 39)
+        to_run = scheduler.get_and_mark_pending()
+        assert len(to_run) == 1
+        assert to_run[0].name == 'job1'
+
+    @staticmethod
+    def get_job(scheduler, name):
+        for job in scheduler.jobs:
+            if job.name == name:
+                return job
+
+    def test_scheduler_debug(self, mocker):
+        scheduler = Scheduler(
+            {
+                'joba': {'schedule': datetime.timedelta(seconds=20)},
+                'jobb': {'schedule': datetime.timedelta(seconds=50)},
+                'jobc': {'schedule': datetime.timedelta(seconds=500)},
+                'jobd': {'schedule': datetime.timedelta(seconds=20)},
+            }
+        )
+        rel_time = 119.9  # slightly under the 6th 20-second bin, to avoid offset problems
+        current_time = scheduler.global_start + rel_time
+        mocker.patch('awx.main.dispatch.periodic.time.time', return_value=current_time - 1.0e-8)
+        self.get_job(scheduler, 'jobb').mark_run(rel_time)
+        self.get_job(scheduler, 'jobd').mark_run(rel_time - 20.0)
+
+        output = scheduler.debug()
+        data = yaml.safe_load(output)
+        assert data['schedule_list']['jobc']['last_run_seconds_ago'] is None
+        assert data['schedule_list']['joba']['missed_runs'] == 4
+        assert data['schedule_list']['jobd']['missed_runs'] == 3
+        assert data['schedule_list']['jobd']['completed_runs'] == 1
+        assert data['schedule_list']['jobb']['next_run_in_seconds'] > 25.0
--- a/awx/main/tests/functional/test_jobs.py
+++ b/awx/main/tests/functional/test_jobs.py
@@ -6,6 +6,7 @@ import json
 from awx.main.models import (
    Job,
    Instance,
+    Host,
    JobHostSummary,
    InventoryUpdate,
    InventorySource,
@@ -18,6 +19,9 @@ from awx.main.models import (
    ExecutionEnvironment,
 )
 from awx.main.tasks.system import cluster_node_heartbeat
+from awx.main.tasks.facts import update_hosts
+
+from django.db import OperationalError
 from django.test.utils import override_settings


@@ -33,9 +37,9 @@ def test_orphan_unified_job_creation(instance, inventory):


@pytest.mark.django_db
-@mock.patch('awx.main.tasks.system.inspect_execution_nodes', lambda *args, **kwargs: None)
-@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu: 8)
-@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem: 62)
+@mock.patch('awx.main.tasks.system.inspect_execution_and_hop_nodes', lambda *args, **kwargs: None)
+@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu, is_control_node: 8)
+@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem, is_control_node: 62)
 def test_job_capacity_and_with_inactive_node():
    i = Instance.objects.create(hostname='test-1')
    i.save_health_data('18.0.1', 2, 8000)
@@ -112,6 +116,51 @@ def test_job_notification_host_data(inventory, machine_credential, project, job_
    }


+@pytest.mark.django_db
+class TestAnsibleFactsSave:
+    current_call = 0
+
+    def test_update_hosts_deleted_host(self, inventory):
+        hosts = [Host.objects.create(inventory=inventory, name=f'foo{i}') for i in range(3)]
+        for host in hosts:
+            host.ansible_facts = {'foo': 'bar'}
+        last_pk = hosts[-1].pk
+        assert inventory.hosts.count() == 3
+        Host.objects.get(pk=last_pk).delete()
+        assert inventory.hosts.count() == 2
+        update_hosts(hosts)
+        assert inventory.hosts.count() == 2
+        for host in inventory.hosts.all():
+            host.refresh_from_db()
+            assert host.ansible_facts == {'foo': 'bar'}
+
+    def test_update_hosts_forever_deadlock(self, inventory, mocker):
+        hosts = [Host.objects.create(inventory=inventory, name=f'foo{i}') for i in range(3)]
+        for host in hosts:
+            host.ansible_facts = {'foo': 'bar'}
+        db_mock = mocker.patch('awx.main.tasks.facts.Host.objects.bulk_update')
+        db_mock.side_effect = OperationalError('deadlock detected')
+        with pytest.raises(OperationalError):
+            update_hosts(hosts)
+
+    def fake_bulk_update(self, host_list):
+        if self.current_call > 2:
+            return Host.objects.bulk_update(host_list, ['ansible_facts', 'ansible_facts_modified'])
+        self.current_call += 1
+        raise OperationalError('deadlock detected')
+
+    def test_update_hosts_resolved_deadlock(self, inventory, mocker):
+        hosts = [Host.objects.create(inventory=inventory, name=f'foo{i}') for i in range(3)]
+        for host in hosts:
+            host.ansible_facts = {'foo': 'bar'}
+        self.current_call = 0
+        mocker.patch('awx.main.tasks.facts.raw_update_hosts', new=self.fake_bulk_update)
+        update_hosts(hosts)
+        for host in inventory.hosts.all():
+            host.refresh_from_db()
+            assert host.ansible_facts == {'foo': 'bar'}
+
+
@pytest.mark.django_db
 class TestLaunchConfig:
    def test_null_creation_from_prompts(self):
--- a/awx/main/tests/functional/test_tasks.py
+++ b/awx/main/tests/functional/test_tasks.py
@@ -5,8 +5,8 @@ import tempfile
 import shutil

 from awx.main.tasks.jobs import RunJob
-from awx.main.tasks.system import execution_node_health_check, _cleanup_images_and_files, handle_work_error
-from awx.main.models import Instance, Job, InventoryUpdate, ProjectUpdate
+from awx.main.tasks.system import execution_node_health_check, _cleanup_images_and_files
+from awx.main.models import Instance, Job


@pytest.fixture
@@ -73,17 +73,3 @@ def test_does_not_run_reaped_job(mocker, mock_me):
    job.refresh_from_db()
    assert job.status == 'failed'
    mock_run.assert_not_called()
-
-
-@pytest.mark.django_db
-def test_handle_work_error_nested(project, inventory_source):
-    pu = ProjectUpdate.objects.create(status='failed', project=project, celery_task_id='1234')
-    iu = InventoryUpdate.objects.create(status='pending', inventory_source=inventory_source, source='scm')
-    job = Job.objects.create(status='pending')
-    iu.dependent_jobs.add(pu)
-    job.dependent_jobs.add(pu, iu)
-    handle_work_error({'type': 'project_update', 'id': pu.id})
-    iu.refresh_from_db()
-    job.refresh_from_db()
-    assert iu.job_explanation == f'Previous Task Failed: {{"job_type": "project_update", "job_name": "", "job_id": "{pu.id}"}}'
-    assert job.job_explanation == f'Previous Task Failed: {{"job_type": "inventory_update", "job_name": "", "job_id": "{iu.id}"}}'
--- a/awx/main/tests/unit/api/test_logger.py
+++ b/awx/main/tests/unit/api/test_logger.py
@@ -47,7 +47,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="logs-01.loggly.com" serverport="80" usehttps="off" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="inputs/1fd38090-2af1-4e1e-8d80-492899da0f71/tag/http/")',  # noqa
+                    'action(type="omhttp" server="logs-01.loggly.com" serverport="80" usehttps="off" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="inputs/1fd38090-2af1-4e1e-8d80-492899da0f71/tag/http/")',  # noqa
                ]
            ),
        ),
@@ -61,7 +61,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")',
-                    'action(type="omfwd" target="localhost" port="9000" protocol="udp" action.resumeRetryCount="-1" action.resumeInterval="5" template="awx")',  # noqa
+                    'action(type="omfwd" target="localhost" port="9000" protocol="udp" action.resumeRetryCount="-1" action.resumeInterval="5" template="awx" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5")',  # noqa
                ]
            ),
        ),
@@ -75,7 +75,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")',
-                    'action(type="omfwd" target="localhost" port="9000" protocol="tcp" action.resumeRetryCount="-1" action.resumeInterval="5" template="awx")',  # noqa
+                    'action(type="omfwd" target="localhost" port="9000" protocol="tcp" action.resumeRetryCount="-1" action.resumeInterval="5" template="awx" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5")',  # noqa
                ]
            ),
        ),
@@ -89,7 +89,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="yoursplunk" serverport="443" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
+                    'action(type="omhttp" server="yoursplunk" serverport="443" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
                ]
            ),
        ),
@@ -103,7 +103,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="yoursplunk" serverport="80" usehttps="off" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
+                    'action(type="omhttp" server="yoursplunk" serverport="80" usehttps="off" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
                ]
            ),
        ),
@@ -117,7 +117,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="yoursplunk" serverport="8088" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
+                    'action(type="omhttp" server="yoursplunk" serverport="8088" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
                ]
            ),
        ),
@@ -131,7 +131,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="yoursplunk" serverport="8088" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
+                    'action(type="omhttp" server="yoursplunk" serverport="8088" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
                ]
            ),
        ),
@@ -145,7 +145,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="yoursplunk.org" serverport="8088" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
+                    'action(type="omhttp" server="yoursplunk.org" serverport="8088" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
                ]
            ),
        ),
@@ -159,7 +159,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="yoursplunk.org" serverport="8088" usehttps="off" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
+                    'action(type="omhttp" server="yoursplunk.org" serverport="8088" usehttps="off" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="services/collector/event")',  # noqa
                ]
            ),
        ),
@@ -173,7 +173,7 @@ data_loggly = {
            '\n'.join(
                [
                    'template(name="awx" type="string" string="%rawmsg-after-pri%")\nmodule(load="omhttp")',
-                    'action(type="omhttp" server="endpoint5.collection.us2.sumologic.com" serverport="443" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxdiskspace="1g" queue.type="LinkedList" queue.saveOnShutdown="on" errorfile="/var/log/tower/rsyslog.err" restpath="receiver/v1/http/ZaVnC4dhaV0qoiETY0MrM3wwLoDgO1jFgjOxE6-39qokkj3LGtOroZ8wNaN2M6DtgYrJZsmSi4-36_Up5TbbN_8hosYonLKHSSOSKY845LuLZBCBwStrHQ==")',  # noqa
+                    'action(type="omhttp" server="endpoint5.collection.us2.sumologic.com" serverport="443" usehttps="on" allowunsignedcerts="off" skipverifyhost="off" action.resumeRetryCount="-1" template="awx" action.resumeInterval="5" queue.spoolDirectory="/var/lib/awx" queue.filename="awx-external-logger-action-queue" queue.maxDiskSpace="1g" queue.maxFileSize="100m" queue.type="LinkedList" queue.saveOnShutdown="on" queue.syncqueuefiles="on" queue.checkpointInterval="1000" queue.size="131072" queue.highwaterMark="98304" queue.discardMark="117964" queue.discardSeverity="5" errorfile="/var/log/tower/rsyslog.err" restpath="receiver/v1/http/ZaVnC4dhaV0qoiETY0MrM3wwLoDgO1jFgjOxE6-39qokkj3LGtOroZ8wNaN2M6DtgYrJZsmSi4-36_Up5TbbN_8hosYonLKHSSOSKY845LuLZBCBwStrHQ==")',  # noqa
                ]
            ),
        ),
--- a/awx/main/tests/unit/settings/test_k8s_resource_setttings.py
+++ b/awx/main/tests/unit/settings/test_k8s_resource_setttings.py
@@ -36,7 +36,9 @@ def test_SYSTEM_TASK_ABS_MEM_conversion(value, converted_value, mem_capacity):
        mock_settings.IS_K8S = True
        assert convert_mem_str_to_bytes(value) == converted_value
        assert get_corrected_memory(-1) == converted_value
-        assert get_mem_effective_capacity(-1) == mem_capacity
+        assert get_mem_effective_capacity(1, is_control_node=True) == mem_capacity
+        # SYSTEM_TASK_ABS_MEM should not effect memory and capacity for execution nodes
+        assert get_mem_effective_capacity(2147483648, is_control_node=False) == 20


@pytest.mark.parametrize(
@@ -58,4 +60,6 @@ def test_SYSTEM_TASK_ABS_CPU_conversion(value, converted_value, cpu_capacity):
        mock_settings.SYSTEM_TASK_FORKS_CPU = 4
        assert convert_cpu_str_to_decimal_cpu(value) == converted_value
        assert get_corrected_cpu(-1) == converted_value
-        assert get_cpu_effective_capacity(-1) == cpu_capacity
+        assert get_cpu_effective_capacity(-1, is_control_node=True) == cpu_capacity
+        # SYSTEM_TASK_ABS_CPU should not effect cpu count and capacity for execution nodes
+        assert get_cpu_effective_capacity(2.0, is_control_node=False) == 8
--- a/awx/main/tests/unit/tasks/test_signals.py
+++ b/awx/main/tests/unit/tasks/test_signals.py
@@ -1,8 +1,43 @@
 import signal
+import functools

 from awx.main.tasks.signals import signal_state, signal_callback, with_signal_handling


+def pytest_sigint():
+    pytest_sigint.called_count += 1
+
+
+def pytest_sigterm():
+    pytest_sigterm.called_count += 1
+
+
+def tmp_signals_for_test(func):
+    """
+    When we run our internal signal handlers, it will call the original signal
+    handlers when its own work is finished.
+    This would crash the test runners normally, because those methods will
+    shut down the process.
+    So this is a decorator to safely replace existing signal handlers
+    with new signal handlers that do nothing so that tests do not crash.
+    """
+
+    @functools.wraps(func)
+    def wrapper():
+        original_sigterm = signal.getsignal(signal.SIGTERM)
+        original_sigint = signal.getsignal(signal.SIGINT)
+        signal.signal(signal.SIGTERM, pytest_sigterm)
+        signal.signal(signal.SIGINT, pytest_sigint)
+        pytest_sigterm.called_count = 0
+        pytest_sigint.called_count = 0
+        func()
+        signal.signal(signal.SIGTERM, original_sigterm)
+        signal.signal(signal.SIGINT, original_sigint)
+
+    return wrapper
+
+
+@tmp_signals_for_test
 def test_outer_inner_signal_handling():
    """
    Even if the flag is set in the outer context, its value should persist in the inner context
@@ -15,17 +50,22 @@ def test_outer_inner_signal_handling():
    @with_signal_handling
    def f1():
        assert signal_callback() is False
-        signal_state.set_flag()
+        signal_state.set_sigterm_flag()
        assert signal_callback()
        f2()

    original_sigterm = signal.getsignal(signal.SIGTERM)
    assert signal_callback() is False
+    assert pytest_sigterm.called_count == 0
+    assert pytest_sigint.called_count == 0
    f1()
    assert signal_callback() is False
    assert signal.getsignal(signal.SIGTERM) is original_sigterm
+    assert pytest_sigterm.called_count == 1
+    assert pytest_sigint.called_count == 0


+@tmp_signals_for_test
 def test_inner_outer_signal_handling():
    """
    Even if the flag is set in the inner context, its value should persist in the outer context
@@ -34,7 +74,7 @@ def test_inner_outer_signal_handling():
    @with_signal_handling
    def f2():
        assert signal_callback() is False
-        signal_state.set_flag()
+        signal_state.set_sigint_flag()
        assert signal_callback()

    @with_signal_handling
@@ -45,6 +85,10 @@ def test_inner_outer_signal_handling():

    original_sigterm = signal.getsignal(signal.SIGTERM)
    assert signal_callback() is False
+    assert pytest_sigterm.called_count == 0
+    assert pytest_sigint.called_count == 0
    f1()
    assert signal_callback() is False
    assert signal.getsignal(signal.SIGTERM) is original_sigterm
+    assert pytest_sigterm.called_count == 0
+    assert pytest_sigint.called_count == 1
--- a/awx/main/tests/unit/test_tasks.py
+++ b/awx/main/tests/unit/test_tasks.py
@@ -143,13 +143,6 @@ def test_send_notifications_job_id(mocker):
        assert UnifiedJob.objects.get.called_with(id=1)


-def test_work_success_callback_missing_job():
-    task_data = {'type': 'project_update', 'id': 9999}
-    with mock.patch('django.db.models.query.QuerySet.get') as get_mock:
-        get_mock.side_effect = ProjectUpdate.DoesNotExist()
-        assert system.handle_work_success(task_data) is None
-
-
@mock.patch('awx.main.models.UnifiedJob.objects.get')
@mock.patch('awx.main.models.Notification.objects.filter')
 def test_send_notifications_list(mock_notifications_filter, mock_job_get, mocker):
--- a/awx/main/utils/common.py
+++ b/awx/main/utils/common.py
@@ -23,7 +23,7 @@ from django.core.exceptions import ObjectDoesNotExist, FieldDoesNotExist
 from django.utils.dateparse import parse_datetime
 from django.utils.translation import gettext_lazy as _
 from django.utils.functional import cached_property
-from django.db import connection, transaction, ProgrammingError
+from django.db import connection, transaction, ProgrammingError, IntegrityError
 from django.db.models.fields.related import ForeignObjectRel, ManyToManyField
 from django.db.models.fields.related_descriptors import ForwardManyToOneDescriptor, ManyToManyDescriptor
 from django.db.models.query import QuerySet
@@ -768,14 +768,13 @@ def get_corrected_cpu(cpu_count):  # formerlly get_cpu_capacity
    return cpu_count  # no correction


-def get_cpu_effective_capacity(cpu_count):
+def get_cpu_effective_capacity(cpu_count, is_control_node=False):
    from django.conf import settings

-    cpu_count = get_corrected_cpu(cpu_count)
-
    settings_forkcpu = getattr(settings, 'SYSTEM_TASK_FORKS_CPU', None)
    env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)
-
+    if is_control_node:
+        cpu_count = get_corrected_cpu(cpu_count)
    if env_forkcpu:
        forkcpu = int(env_forkcpu)
    elif settings_forkcpu:
@@ -834,6 +833,7 @@ def get_corrected_memory(memory):

    # Runner returns memory in bytes
    # so we convert memory from settings to bytes as well.
+
    if env_absmem is not None:
        return convert_mem_str_to_bytes(env_absmem)
    elif settings_absmem is not None:
@@ -842,14 +842,13 @@ def get_corrected_memory(memory):
    return memory


-def get_mem_effective_capacity(mem_bytes):
+def get_mem_effective_capacity(mem_bytes, is_control_node=False):
    from django.conf import settings

-    mem_bytes = get_corrected_memory(mem_bytes)
-
    settings_mem_mb_per_fork = getattr(settings, 'SYSTEM_TASK_FORKS_MEM', None)
    env_mem_mb_per_fork = os.getenv('SYSTEM_TASK_FORKS_MEM', None)
-
+    if is_control_node:
+        mem_bytes = get_corrected_memory(mem_bytes)
    if env_mem_mb_per_fork:
        mem_mb_per_fork = int(env_mem_mb_per_fork)
    elif settings_mem_mb_per_fork:
@@ -1165,13 +1164,24 @@ def create_partition(tblname, start=None):
    try:
        with transaction.atomic():
            with connection.cursor() as cursor:
+                cursor.execute(f"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{tblname}_{partition_label}');")
+                row = cursor.fetchone()
+                if row is not None:
+                    for val in row:  # should only have 1
+                        if val is True:
+                            logger.debug(f'Event partition table {tblname}_{partition_label} already exists')
+                            return
+
                cursor.execute(
-                    f'CREATE TABLE IF NOT EXISTS {tblname}_{partition_label} '
-                    f'PARTITION OF {tblname} '
-                    f'FOR VALUES FROM (\'{start_timestamp}\') to (\'{end_timestamp}\');'
+                    f'CREATE TABLE {tblname}_{partition_label} (LIKE {tblname} INCLUDING DEFAULTS INCLUDING CONSTRAINTS); '
+                    f'ALTER TABLE {tblname} ATTACH PARTITION {tblname}_{partition_label} '
+                    f'FOR VALUES FROM (\'{start_timestamp}\') TO (\'{end_timestamp}\');'
                )
-    except ProgrammingError as e:
-        logger.debug(f'Caught known error due to existing partition: {e}')
+    except (ProgrammingError, IntegrityError) as e:
+        if 'already exists' in str(e):
+            logger.info(f'Caught known error due to partition creation race: {e}')
+        else:
+            raise


 def cleanup_new_process(func):
--- a/awx/main/utils/external_logging.py
+++ b/awx/main/utils/external_logging.py
@@ -17,11 +17,26 @@ def construct_rsyslog_conf_template(settings=settings):
    port = getattr(settings, 'LOG_AGGREGATOR_PORT', '')
    protocol = getattr(settings, 'LOG_AGGREGATOR_PROTOCOL', '')
    timeout = getattr(settings, 'LOG_AGGREGATOR_TCP_TIMEOUT', 5)
-    max_disk_space_main_queue = getattr(settings, 'LOG_AGGREGATOR_MAX_DISK_USAGE_GB', 1)
+    action_queue_size = getattr(settings, 'LOG_AGGREGATOR_ACTION_QUEUE_SIZE', 131072)
    max_disk_space_action_queue = getattr(settings, 'LOG_AGGREGATOR_ACTION_MAX_DISK_USAGE_GB', 1)
    spool_directory = getattr(settings, 'LOG_AGGREGATOR_MAX_DISK_USAGE_PATH', '/var/lib/awx').rstrip('/')
    error_log_file = getattr(settings, 'LOG_AGGREGATOR_RSYSLOGD_ERROR_LOG_FILE', '')

+    queue_options = [
+        f'queue.spoolDirectory="{spool_directory}"',
+        'queue.filename="awx-external-logger-action-queue"',
+        f'queue.maxDiskSpace="{max_disk_space_action_queue}g"',  # overall disk space for all queue files
+        'queue.maxFileSize="100m"',  # individual file size
+        'queue.type="LinkedList"',
+        'queue.saveOnShutdown="on"',
+        'queue.syncqueuefiles="on"',  # (f)sync when checkpoint occurs
+        'queue.checkpointInterval="1000"',  # Update disk queue every 1000 messages
+        f'queue.size="{action_queue_size}"',  # max number of messages in queue
+        f'queue.highwaterMark="{int(action_queue_size * 0.75)}"',  # 75% of queue.size
+        f'queue.discardMark="{int(action_queue_size * 0.9)}"',  # 90% of queue.size
+        'queue.discardSeverity="5"',  # Only discard notice, info, debug if we must discard anything
+    ]
+
    if not os.access(spool_directory, os.W_OK):
        spool_directory = '/var/lib/awx'

@@ -33,7 +48,6 @@ def construct_rsyslog_conf_template(settings=settings):
            '$WorkDirectory /var/lib/awx/rsyslog',
            f'$MaxMessageSize {max_bytes}',
            '$IncludeConfig /var/lib/awx/rsyslog/conf.d/*.conf',
-            f'main_queue(queue.spoolDirectory="{spool_directory}" queue.maxdiskspace="{max_disk_space_main_queue}g" queue.type="Disk" queue.filename="awx-external-logger-backlog")',  # noqa
            'module(load="imuxsock" SysSock.Use="off")',
            'input(type="imuxsock" Socket="' + settings.LOGGING['handlers']['external_logger']['address'] + '" unlink="on" RateLimit.Burst="0")',
            'template(name="awx" type="string" string="%rawmsg-after-pri%")',
@@ -79,12 +93,7 @@ def construct_rsyslog_conf_template(settings=settings):
            'action.resumeRetryCount="-1"',
            'template="awx"',
            f'action.resumeInterval="{timeout}"',
-            f'queue.spoolDirectory="{spool_directory}"',
-            'queue.filename="awx-external-logger-action-queue"',
-            f'queue.maxdiskspace="{max_disk_space_action_queue}g"',
-            'queue.type="LinkedList"',
-            'queue.saveOnShutdown="on"',
-        ]
+        ] + queue_options
        if error_log_file:
            params.append(f'errorfile="{error_log_file}"')
        if parsed.path:
@@ -112,9 +121,18 @@ def construct_rsyslog_conf_template(settings=settings):
        params = ' '.join(params)
        parts.extend(['module(load="omhttp")', f'action({params})'])
    elif protocol and host and port:
-        parts.append(
-            f'action(type="omfwd" target="{host}" port="{port}" protocol="{protocol}" action.resumeRetryCount="-1" action.resumeInterval="{timeout}" template="awx")'  # noqa
-        )
+        params = [
+            'type="omfwd"',
+            f'target="{host}"',
+            f'port="{port}"',
+            f'protocol="{protocol}"',
+            'action.resumeRetryCount="-1"',
+            f'action.resumeInterval="{timeout}"',
+            'template="awx"',
+        ] + queue_options
+        params = ' '.join(params)
+        parts.append(f'action({params})')
+
    else:
        parts.append('action(type="omfile" file="/dev/null")')  # rsyslog needs *at least* one valid action to start
    tmpl = '\n'.join(parts)
--- a/Show More
+++ b/Show More