Compare commits

..

1 Commits

Author SHA1 Message Date
Alex Corey
d87c091eea Refactors the project form 2022-10-05 15:43:01 -04:00
657 changed files with 17493 additions and 21347 deletions

View File

@@ -53,16 +53,6 @@ https://github.com/ansible/awx/#get-involved \
Thank you once again for this and your interest in AWX!
### Red Hat Support Team
- Hi! \
\
It appears that you are using an RPM build for RHEL. Please reach out to the Red Hat support team and submit a ticket. \
\
Here is the link to do so: \
\
https://access.redhat.com/support \
\
Thank you for your submission and for supporting AWX!
## Common
@@ -106,13 +96,6 @@ The Ansible Community is looking at building an EE that corresponds to all of th
### Oracle AWX
We'd be happy to help if you can reproduce this with AWX since we do not have Oracle's Linux Automation Manager. If you need help with this specific version of Oracles Linux Automation Manager you will need to contact your Oracle for support.
### Community Resolved
Hi,
We are happy to see that it appears a fix has been provided for your issue, so we will go ahead and close this ticket. Please feel free to reopen if any other problems arise.
<name of community member who helped> thanks so much for taking the time to write a thoughtful and helpful response to this issue!
### AWX Release
Subject: Announcing AWX Xa.Ya.za and AWX-Operator Xb.Yb.zb

View File

@@ -2,7 +2,6 @@
name: CI
env:
BRANCH: ${{ github.base_ref || 'devel' }}
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
on:
pull_request:
jobs:
@@ -145,22 +144,3 @@ jobs:
env:
AWX_TEST_IMAGE: awx
AWX_TEST_VERSION: ci
collection-sanity:
name: awx_collection sanity
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v2
# The containers that GitHub Actions use have Ansible installed, so upgrade to make sure we have the latest version.
- name: Upgrade ansible-core
run: python3 -m pip install --upgrade ansible-core
- name: Run sanity tests
run: make test_collection_sanity
env:
# needed due to cgroupsv2. This is fixed, but a stable release
# with the fix has not been made yet.
ANSIBLE_TEST_PREFER_PODMAN: 1

View File

@@ -1,7 +1,5 @@
---
name: Build/Push Development Images
env:
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
on:
push:
branches:

View File

@@ -1,12 +1,9 @@
---
name: E2E Tests
env:
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
on:
pull_request_target:
types: [labeled]
jobs:
jobs:
e2e-test:
if: contains(github.event.pull_request.labels.*.name, 'qe:e2e')
runs-on: ubuntu-latest
@@ -107,3 +104,5 @@ jobs:
with:
name: AWX-logs-${{ matrix.job }}
path: make-docker-compose-output.log

View File

@@ -1,26 +0,0 @@
---
name: Feature branch deletion cleanup
env:
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
on:
delete:
branches:
- feature_**
jobs:
push:
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
steps:
- name: Delete API Schema
env:
AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }}
AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }}
AWS_REGION: 'us-east-1'
run: |
ansible localhost -c local, -m command -a "{{ ansible_python_interpreter + ' -m pip install boto3'}}"
ansible localhost -c local -m aws_s3 \
-a "bucket=awx-public-ci-files object=${GITHUB_REF##*/}/schema.json mode=delete permission=public-read"

View File

@@ -13,13 +13,21 @@ jobs:
packages: write
contents: read
steps:
- name: Check for each of the lines
env:
PR_BODY: ${{ github.event.pull_request.body }}
- name: Write PR body to a file
run: |
echo "$PR_BODY" | grep "Bug, Docs Fix or other nominal change" > Z
echo "$PR_BODY" | grep "New or Enhanced Feature" > Y
echo "$PR_BODY" | grep "Breaking Change" > X
cat >> pr.body << __SOME_RANDOM_PR_EOF__
${{ github.event.pull_request.body }}
__SOME_RANDOM_PR_EOF__
- name: Display the received body for troubleshooting
run: cat pr.body
# We want to write these out individually just incase the options were joined on a single line
- name: Check for each of the lines
run: |
grep "Bug, Docs Fix or other nominal change" pr.body > Z
grep "New or Enhanced Feature" pr.body > Y
grep "Breaking Change" pr.body > X
exit 0
# We exit 0 and set the shell to prevent the returns from the greps from failing this step
# See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference

View File

@@ -1,9 +1,5 @@
---
name: Promote Release
env:
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
on:
release:
types: [published]
@@ -38,13 +34,9 @@ jobs:
- name: Build collection and publish to galaxy
run: |
COLLECTION_TEMPLATE_VERSION=true COLLECTION_NAMESPACE=${{ env.collection_namespace }} make build_collection
if [ "$(curl --head -sw '%{http_code}' https://galaxy.ansible.com/download/${{ env.collection_namespace }}-awx-${{ github.event.release.tag_name }}.tar.gz | tail -1)" == "302" ] ; then \
echo "Galaxy release already done"; \
else \
ansible-galaxy collection publish \
--token=${{ secrets.GALAXY_TOKEN }} \
awx_collection_build/${{ env.collection_namespace }}-awx-${{ github.event.release.tag_name }}.tar.gz; \
fi
ansible-galaxy collection publish \
--token=${{ secrets.GALAXY_TOKEN }} \
awx_collection_build/${{ env.collection_namespace }}-awx-${{ github.event.release.tag_name }}.tar.gz
- name: Set official pypi info
run: echo pypi_repo=pypi >> $GITHUB_ENV
@@ -56,7 +48,6 @@ jobs:
- name: Build awxkit and upload to pypi
run: |
git reset --hard
cd awxkit && python3 setup.py bdist_wheel
twine upload \
-r ${{ env.pypi_repo }} \
@@ -79,6 +70,4 @@ jobs:
docker tag ghcr.io/${{ github.repository }}:${{ github.event.release.tag_name }} quay.io/${{ github.repository }}:latest
docker push quay.io/${{ github.repository }}:${{ github.event.release.tag_name }}
docker push quay.io/${{ github.repository }}:latest
docker pull ghcr.io/${{ github.repository_owner }}/awx-ee:${{ github.event.release.tag_name }}
docker tag ghcr.io/${{ github.repository_owner }}/awx-ee:${{ github.event.release.tag_name }} quay.io/${{ github.repository_owner }}/awx-ee:${{ github.event.release.tag_name }}
docker push quay.io/${{ github.repository_owner }}/awx-ee:${{ github.event.release.tag_name }}

View File

@@ -1,9 +1,5 @@
---
name: Stage Release
env:
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
on:
workflow_dispatch:
inputs:
@@ -84,20 +80,6 @@ jobs:
-e push=yes \
-e awx_official=yes
- name: Log in to GHCR
run: |
echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ github.actor }} --password-stdin
- name: Log in to Quay
run: |
echo ${{ secrets.QUAY_TOKEN }} | docker login quay.io -u ${{ secrets.QUAY_USER }} --password-stdin
- name: tag awx-ee:latest with version input
run: |
docker pull quay.io/ansible/awx-ee:latest
docker tag quay.io/ansible/awx-ee:latest ghcr.io/${{ github.repository_owner }}/awx-ee:${{ github.event.inputs.version }}
docker push ghcr.io/${{ github.repository_owner }}/awx-ee:${{ github.event.inputs.version }}
- name: Build and stage awx-operator
working-directory: awx-operator
run: |
@@ -117,7 +99,6 @@ jobs:
env:
AWX_TEST_IMAGE: ${{ github.repository }}
AWX_TEST_VERSION: ${{ github.event.inputs.version }}
AWX_EE_TEST_IMAGE: ghcr.io/${{ github.repository_owner }}/awx-ee:${{ github.event.inputs.version }}
- name: Create draft release for AWX
working-directory: awx

View File

@@ -1,15 +1,10 @@
---
name: Upload API Schema
env:
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
on:
push:
branches:
- devel
- release_**
- feature_**
jobs:
push:
runs-on: ubuntu-latest

View File

@@ -12,7 +12,7 @@ recursive-include awx/plugins *.ps1
recursive-include requirements *.txt
recursive-include requirements *.yml
recursive-include config *
recursive-include licenses *
recursive-include docs/licenses *
recursive-exclude awx devonly.py*
recursive-exclude awx/api/tests *
recursive-exclude awx/main/tests *

View File

@@ -6,20 +6,7 @@ CHROMIUM_BIN=/tmp/chrome-linux/chrome
GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD)
MANAGEMENT_COMMAND ?= awx-manage
VERSION := $(shell $(PYTHON) tools/scripts/scm_version.py)
# ansible-test requires semver compatable version, so we allow overrides to hack it
COLLECTION_VERSION ?= $(shell $(PYTHON) tools/scripts/scm_version.py | cut -d . -f 1-3)
# args for the ansible-test sanity command
COLLECTION_SANITY_ARGS ?= --docker
# collection unit testing directories
COLLECTION_TEST_DIRS ?= awx_collection/test/awx
# collection integration test directories (defaults to all)
COLLECTION_TEST_TARGET ?=
# args for collection install
COLLECTION_PACKAGE ?= awx
COLLECTION_NAMESPACE ?= awx
COLLECTION_INSTALL = ~/.ansible/collections/ansible_collections/$(COLLECTION_NAMESPACE)/$(COLLECTION_PACKAGE)
COLLECTION_TEMPLATE_VERSION ?= false
COLLECTION_VERSION := $(shell $(PYTHON) tools/scripts/scm_version.py | cut -d . -f 1-3)
# NOTE: This defaults the container image version to the branch that's active
COMPOSE_TAG ?= $(GIT_BRANCH)
@@ -47,7 +34,7 @@ RECEPTOR_IMAGE ?= quay.io/ansible/receptor:devel
SRC_ONLY_PKGS ?= cffi,pycparser,psycopg2,twilio
# These should be upgraded in the AWX and Ansible venv before attempting
# to install the actual requirements
VENV_BOOTSTRAP ?= pip==21.2.4 setuptools==65.6.3 setuptools_scm[toml]==7.0.5 wheel==0.38.4
VENV_BOOTSTRAP ?= pip==21.2.4 setuptools==58.2.0 setuptools_scm[toml]==6.4.2 wheel==0.36.2
NAME ?= awx
@@ -98,7 +85,6 @@ clean: clean-ui clean-api clean-awxkit clean-dist
clean-api:
rm -rf build $(NAME)-$(VERSION) *.egg-info
rm -rf .tox
find . -type f -regex ".*\.py[co]$$" -delete
find . -type d -name "__pycache__" -delete
rm -f awx/awx_test.sqlite3*
@@ -131,7 +117,7 @@ virtualenv_awx:
fi; \
fi
## Install third-party requirements needed for AWX's environment.
## Install third-party requirements needed for AWX's environment.
# this does not use system site packages intentionally
requirements_awx: virtualenv_awx
if [[ "$(PIP_OPTIONS)" == *"--no-index"* ]]; then \
@@ -195,7 +181,7 @@ collectstatic:
@if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
fi; \
$(PYTHON) manage.py collectstatic --clear --noinput > /dev/null 2>&1
mkdir -p awx/public/static && $(PYTHON) manage.py collectstatic --clear --noinput > /dev/null 2>&1
DEV_RELOAD_COMMAND ?= supervisorctl restart tower-processes:*
@@ -301,13 +287,19 @@ test:
cd awxkit && $(VENV_BASE)/awx/bin/tox -re py3
awx-manage check_migrations --dry-run --check -n 'missing_migration_file'
COLLECTION_TEST_DIRS ?= awx_collection/test/awx
COLLECTION_TEST_TARGET ?=
COLLECTION_PACKAGE ?= awx
COLLECTION_NAMESPACE ?= awx
COLLECTION_INSTALL = ~/.ansible/collections/ansible_collections/$(COLLECTION_NAMESPACE)/$(COLLECTION_PACKAGE)
COLLECTION_TEMPLATE_VERSION ?= false
test_collection:
rm -f $(shell ls -d $(VENV_BASE)/awx/lib/python* | head -n 1)/no-global-site-packages.txt
if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
fi && \
if ! [ -x "$(shell command -v ansible-playbook)" ]; then pip install ansible-core; fi
ansible --version
pip install ansible-core && \
py.test $(COLLECTION_TEST_DIRS) -v
# The python path needs to be modified so that the tests can find Ansible within the container
# First we will use anything expility set as PYTHONPATH
@@ -337,13 +329,8 @@ install_collection: build_collection
rm -rf $(COLLECTION_INSTALL)
ansible-galaxy collection install awx_collection_build/$(COLLECTION_NAMESPACE)-$(COLLECTION_PACKAGE)-$(COLLECTION_VERSION).tar.gz
test_collection_sanity:
rm -rf awx_collection_build/
rm -rf $(COLLECTION_INSTALL)
if ! [ -x "$(shell command -v ansible-test)" ]; then pip install ansible-core; fi
ansible --version
COLLECTION_VERSION=1.0.0 make install_collection
cd $(COLLECTION_INSTALL) && ansible-test sanity $(COLLECTION_SANITY_ARGS)
test_collection_sanity: install_collection
cd $(COLLECTION_INSTALL) && ansible-test sanity
test_collection_integration: install_collection
cd $(COLLECTION_INSTALL) && ansible-test integration $(COLLECTION_TEST_TARGET)
@@ -390,8 +377,6 @@ clean-ui:
rm -rf awx/ui/build
rm -rf awx/ui/src/locales/_build
rm -rf $(UI_BUILD_FLAG_FILE)
# the collectstatic command doesn't like it if this dir doesn't exist.
mkdir -p awx/ui/build/static
awx/ui/node_modules:
NODE_OPTIONS=--max-old-space-size=6144 $(NPM_BIN) --prefix awx/ui --loglevel warn --force ci
@@ -401,18 +386,20 @@ $(UI_BUILD_FLAG_FILE):
$(PYTHON) tools/scripts/compilemessages.py
$(NPM_BIN) --prefix awx/ui --loglevel warn run compile-strings
$(NPM_BIN) --prefix awx/ui --loglevel warn run build
mkdir -p awx/public/static/css
mkdir -p awx/public/static/js
mkdir -p awx/public/static/media
cp -r awx/ui/build/static/css/* awx/public/static/css
cp -r awx/ui/build/static/js/* awx/public/static/js
cp -r awx/ui/build/static/media/* awx/public/static/media
touch $@
ui-release: $(UI_BUILD_FLAG_FILE)
ui-devel: awx/ui/node_modules
@$(MAKE) -B $(UI_BUILD_FLAG_FILE)
mkdir -p /var/lib/awx/public/static/css
mkdir -p /var/lib/awx/public/static/js
mkdir -p /var/lib/awx/public/static/media
cp -r awx/ui/build/static/css/* /var/lib/awx/public/static/css
cp -r awx/ui/build/static/js/* /var/lib/awx/public/static/js
cp -r awx/ui/build/static/media/* /var/lib/awx/public/static/media
ui-devel-instrumented: awx/ui/node_modules
$(NPM_BIN) --prefix awx/ui --loglevel warn run start-instrumented
@@ -464,9 +451,8 @@ awx/projects:
COMPOSE_UP_OPTS ?=
COMPOSE_OPTS ?=
CONTROL_PLANE_NODE_COUNT ?= 1
EXECUTION_NODE_COUNT ?= 0
EXECUTION_NODE_COUNT ?= 2
MINIKUBE_CONTAINER_GROUP ?= false
MINIKUBE_SETUP ?= false # if false, run minikube separately
EXTRA_SOURCES_ANSIBLE_OPTS ?=
ifneq ($(ADMIN_PASSWORD),)
@@ -475,7 +461,7 @@ endif
docker-compose-sources: .git/hooks/pre-commit
@if [ $(MINIKUBE_CONTAINER_GROUP) = true ]; then\
ansible-playbook -i tools/docker-compose/inventory -e minikube_setup=$(MINIKUBE_SETUP) tools/docker-compose-minikube/deploy.yml; \
ansible-playbook -i tools/docker-compose/inventory tools/docker-compose-minikube/deploy.yml; \
fi;
ansible-playbook -i tools/docker-compose/inventory tools/docker-compose/ansible/sources.yml \
@@ -605,12 +591,13 @@ pot: $(UI_BUILD_FLAG_FILE)
po: $(UI_BUILD_FLAG_FILE)
$(NPM_BIN) --prefix awx/ui --loglevel warn run extract-strings -- --clean
LANG = "en_us"
## generate API django .pot .po
messages:
@if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
fi; \
$(PYTHON) manage.py makemessages -l en_us --keep-pot
$(PYTHON) manage.py makemessages -l $(LANG) --keep-pot
print-%:
@echo $($*)
@@ -648,4 +635,4 @@ help/generate:
} \
} \
{ lastLine = $$0 }' $(MAKEFILE_LIST) | sort -u
@printf "\n"
@printf "\n"

View File

@@ -96,15 +96,6 @@ register(
category=_('Authentication'),
category_slug='authentication',
)
register(
'ALLOW_METRICS_FOR_ANONYMOUS_USERS',
field_class=fields.BooleanField,
default=False,
label=_('Allow anonymous users to poll metrics'),
help_text=_('If true, anonymous users are allowed to poll metrics.'),
category=_('Authentication'),
category_slug='authentication',
)
def authentication_validate(serializer, attrs):

View File

@@ -6,6 +6,7 @@ import inspect
import logging
import time
import uuid
import urllib.parse
# Django
from django.conf import settings
@@ -13,7 +14,7 @@ from django.contrib.auth import views as auth_views
from django.contrib.contenttypes.models import ContentType
from django.core.cache import cache
from django.core.exceptions import FieldDoesNotExist
from django.db import connection, transaction
from django.db import connection
from django.db.models.fields.related import OneToOneRel
from django.http import QueryDict
from django.shortcuts import get_object_or_404
@@ -29,7 +30,7 @@ from rest_framework.response import Response
from rest_framework import status
from rest_framework import views
from rest_framework.permissions import AllowAny
from rest_framework.renderers import StaticHTMLRenderer
from rest_framework.renderers import StaticHTMLRenderer, JSONRenderer
from rest_framework.negotiation import DefaultContentNegotiation
# AWX
@@ -40,7 +41,7 @@ from awx.main.utils import camelcase_to_underscore, get_search_fields, getattrd,
from awx.main.utils.db import get_all_field_names
from awx.main.utils.licensing import server_product_name
from awx.main.views import ApiErrorView
from awx.api.serializers import ResourceAccessListElementSerializer, CopySerializer
from awx.api.serializers import ResourceAccessListElementSerializer, CopySerializer, UserSerializer
from awx.api.versioning import URLPathVersioning
from awx.api.metadata import SublistAttachDetatchMetadata, Metadata
from awx.conf import settings_registry
@@ -64,7 +65,6 @@ __all__ = [
'ParentMixin',
'SubListAttachDetachAPIView',
'CopyAPIView',
'GenericCancelView',
'BaseUsersList',
]
@@ -90,9 +90,13 @@ class LoggedLoginView(auth_views.LoginView):
def post(self, request, *args, **kwargs):
ret = super(LoggedLoginView, self).post(request, *args, **kwargs)
current_user = getattr(request, 'user', None)
if request.user.is_authenticated:
logger.info(smart_str(u"User {} logged in from {}".format(self.request.user.username, request.META.get('REMOTE_ADDR', None))))
ret.set_cookie('userLoggedIn', 'true')
current_user = UserSerializer(self.request.user)
current_user = smart_str(JSONRenderer().render(current_user.data))
current_user = urllib.parse.quote('%s' % current_user, '')
ret.setdefault('X-API-Session-Cookie-Name', getattr(settings, 'SESSION_COOKIE_NAME', 'awx_sessionid'))
return ret
@@ -249,7 +253,7 @@ class APIView(views.APIView):
response['X-API-Query-Time'] = '%0.3fs' % sum(q_times)
if getattr(self, 'deprecated', False):
response['Warning'] = '299 awx "This resource has been deprecated and will be removed in a future release."'
response['Warning'] = '299 awx "This resource has been deprecated and will be removed in a future release."' # noqa
return response
@@ -986,23 +990,6 @@ class CopyAPIView(GenericAPIView):
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
class GenericCancelView(RetrieveAPIView):
# In subclass set model, serializer_class
obj_permission_type = 'cancel'
@transaction.non_atomic_requests
def dispatch(self, *args, **kwargs):
return super(GenericCancelView, self).dispatch(*args, **kwargs)
def post(self, request, *args, **kwargs):
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
class BaseUsersList(SubListCreateAttachDetachAPIView):
def post(self, request, *args, **kwargs):
ret = super(BaseUsersList, self).post(request, *args, **kwargs)

View File

@@ -24,6 +24,7 @@ __all__ = [
'InventoryInventorySourcesUpdatePermission',
'UserPermission',
'IsSystemAdminOrAuditor',
'InstanceGroupTowerPermission',
'WorkflowApprovalPermission',
]

View File

@@ -29,7 +29,6 @@ from django.utils.translation import gettext_lazy as _
from django.utils.encoding import force_str
from django.utils.text import capfirst
from django.utils.timezone import now
from django.core.validators import RegexValidator, MaxLengthValidator
# Django REST Framework
from rest_framework.exceptions import ValidationError, PermissionDenied
@@ -113,7 +112,7 @@ from awx.main.utils import (
)
from awx.main.utils.filters import SmartFilter
from awx.main.utils.named_url_graph import reset_counters
from awx.main.scheduler.task_manager_models import TaskManagerModels
from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups, TaskManagerInstances
from awx.main.redact import UriCleaner, REPLACE_STR
from awx.main.validators import vars_validate_or_raise
@@ -121,9 +120,6 @@ from awx.main.validators import vars_validate_or_raise
from awx.api.versioning import reverse
from awx.api.fields import BooleanNullField, CharNullField, ChoiceNullField, VerbatimField, DeprecatedCredentialField
# AWX Utils
from awx.api.validators import HostnameRegexValidator
logger = logging.getLogger('awx.api.serializers')
# Fields that should be summarized regardless of object type.
@@ -2221,15 +2217,6 @@ class InventorySourceUpdateSerializer(InventorySourceSerializer):
class Meta:
fields = ('can_update',)
def validate(self, attrs):
project = self.instance.source_project
if project:
failed_reason = project.get_reason_if_failed()
if failed_reason:
raise serializers.ValidationError(failed_reason)
return super(InventorySourceUpdateSerializer, self).validate(attrs)
class InventoryUpdateSerializer(UnifiedJobSerializer, InventorySourceOptionsSerializer):
@@ -3759,11 +3746,7 @@ class LaunchConfigurationBaseSerializer(BaseSerializer):
# Build unsaved version of this config, use it to detect prompts errors
mock_obj = self._build_mock_obj(attrs)
if set(list(ujt.get_ask_mapping().keys()) + ['extra_data']) & set(attrs.keys()):
accepted, rejected, errors = ujt._accept_or_ignore_job_kwargs(_exclude_errors=self.exclude_errors, **mock_obj.prompts_dict())
else:
# Only perform validation of prompts if prompts fields are provided
errors = {}
accepted, rejected, errors = ujt._accept_or_ignore_job_kwargs(_exclude_errors=self.exclude_errors, **mock_obj.prompts_dict())
# Remove all unprocessed $encrypted$ strings, indicating default usage
if 'extra_data' in attrs and password_dict:
@@ -4281,10 +4264,17 @@ class JobLaunchSerializer(BaseSerializer):
# Basic validation - cannot run a playbook without a playbook
if not template.project:
errors['project'] = _("A project is required to run a job.")
else:
failure_reason = template.project.get_reason_if_failed()
if failure_reason:
errors['playbook'] = failure_reason
elif template.project.status in ('error', 'failed'):
errors['playbook'] = _("Missing a revision to run due to failed project update.")
latest_update = template.project.project_updates.last()
if latest_update is not None and latest_update.failed:
failed_validation_tasks = latest_update.project_update_events.filter(
event='runner_on_failed',
play="Perform project signature/checksum verification",
)
if failed_validation_tasks:
errors['playbook'] = _("Last project update failed due to signature validation failure.")
# cannot run a playbook without an inventory
if template.inventory and template.inventory.pending_deletion is True:
@@ -4931,19 +4921,6 @@ class InstanceSerializer(BaseSerializer):
extra_kwargs = {
'node_type': {'initial': Instance.Types.EXECUTION, 'default': Instance.Types.EXECUTION},
'node_state': {'initial': Instance.States.INSTALLED, 'default': Instance.States.INSTALLED},
'hostname': {
'validators': [
MaxLengthValidator(limit_value=250),
validators.UniqueValidator(queryset=Instance.objects.all()),
RegexValidator(
regex=r'^localhost$|^127(?:\.[0-9]+){0,2}\.[0-9]+$|^(?:0*\:)*?:?0*1$',
flags=re.IGNORECASE,
inverse_match=True,
message="hostname cannot be localhost or 127.0.0.1",
),
HostnameRegexValidator(),
],
},
}
def get_related(self, obj):
@@ -4954,7 +4931,7 @@ class InstanceSerializer(BaseSerializer):
res['install_bundle'] = self.reverse('api:instance_install_bundle', kwargs={'pk': obj.pk})
res['peers'] = self.reverse('api:instance_peers_list', kwargs={"pk": obj.pk})
if self.context['request'].user.is_superuser or self.context['request'].user.is_system_auditor:
if obj.node_type == 'execution':
if obj.node_type != 'hop':
res['health_check'] = self.reverse('api:instance_health_check', kwargs={'pk': obj.pk})
return res
@@ -5014,10 +4991,6 @@ class InstanceSerializer(BaseSerializer):
return value
def validate_hostname(self, value):
"""
- Hostname cannot be "localhost" - but can be something like localhost.domain
- Cannot change the hostname of an-already instantiated & initialized Instance object
"""
if self.instance and self.instance.hostname != value:
raise serializers.ValidationError("Cannot change hostname.")
@@ -5040,10 +5013,12 @@ class InstanceHealthCheckSerializer(BaseSerializer):
class InstanceGroupSerializer(BaseSerializer):
show_capabilities = ['edit', 'delete']
capacity = serializers.SerializerMethodField()
consumed_capacity = serializers.SerializerMethodField()
percent_capacity_remaining = serializers.SerializerMethodField()
jobs_running = serializers.SerializerMethodField()
jobs_running = serializers.IntegerField(
help_text=_('Count of jobs in the running or waiting state that ' 'are targeted for this instance group'), read_only=True
)
jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance group'), read_only=True)
instances = serializers.SerializerMethodField()
is_container_group = serializers.BooleanField(
@@ -5069,22 +5044,6 @@ class InstanceGroupSerializer(BaseSerializer):
label=_('Policy Instance Minimum'),
help_text=_("Static minimum number of Instances that will be automatically assign to " "this group when new instances come online."),
)
max_concurrent_jobs = serializers.IntegerField(
default=0,
min_value=0,
required=False,
initial=0,
label=_('Max Concurrent Jobs'),
help_text=_("Maximum number of concurrent jobs to run on a group. When set to zero, no maximum is enforced."),
)
max_forks = serializers.IntegerField(
default=0,
min_value=0,
required=False,
initial=0,
label=_('Max Forks'),
help_text=_("Maximum number of forks to execute concurrently on a group. When set to zero, no maximum is enforced."),
)
policy_instance_list = serializers.ListField(
child=serializers.CharField(),
required=False,
@@ -5106,8 +5065,6 @@ class InstanceGroupSerializer(BaseSerializer):
"consumed_capacity",
"percent_capacity_remaining",
"jobs_running",
"max_concurrent_jobs",
"max_forks",
"jobs_total",
"instances",
"is_container_group",
@@ -5189,39 +5146,28 @@ class InstanceGroupSerializer(BaseSerializer):
# Store capacity values (globally computed) in the context
if 'task_manager_igs' not in self.context:
instance_groups_queryset = None
jobs_qs = UnifiedJob.objects.filter(status__in=('running', 'waiting'))
if self.parent: # Is ListView:
instance_groups_queryset = self.parent.instance
tm_models = TaskManagerModels.init_with_consumed_capacity(
instance_fields=['uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'enabled'],
instance_groups_queryset=instance_groups_queryset,
)
instances = TaskManagerInstances(jobs_qs)
instance_groups = TaskManagerInstanceGroups(instances_by_hostname=instances, instance_groups_queryset=instance_groups_queryset)
self.context['task_manager_igs'] = tm_models.instance_groups
self.context['task_manager_igs'] = instance_groups
return self.context['task_manager_igs']
def get_consumed_capacity(self, obj):
ig_mgr = self.get_ig_mgr()
return ig_mgr.get_consumed_capacity(obj.name)
def get_capacity(self, obj):
ig_mgr = self.get_ig_mgr()
return ig_mgr.get_capacity(obj.name)
def get_percent_capacity_remaining(self, obj):
capacity = self.get_capacity(obj)
if not capacity:
if not obj.capacity:
return 0.0
consumed_capacity = self.get_consumed_capacity(obj)
return float("{0:.2f}".format(((float(capacity) - float(consumed_capacity)) / (float(capacity))) * 100))
ig_mgr = self.get_ig_mgr()
return float("{0:.2f}".format((float(ig_mgr.get_remaining_capacity(obj.name)) / (float(obj.capacity))) * 100))
def get_instances(self, obj):
ig_mgr = self.get_ig_mgr()
return len(ig_mgr.get_instances(obj.name))
def get_jobs_running(self, obj):
ig_mgr = self.get_ig_mgr()
return ig_mgr.get_jobs_running(obj.name)
return obj.instances.count()
class ActivityStreamSerializer(BaseSerializer):

View File

@@ -1,5 +1,5 @@
Launch a Job Template:
{% ifmeth GET %}
Make a GET request to this resource to determine if the job_template can be
launched and whether any passwords are required to launch the job_template.
The response will include the following fields:
@@ -29,8 +29,8 @@ The response will include the following fields:
* `inventory_needed_to_start`: Flag indicating the presence of an inventory
associated with the job template. If not then one should be supplied when
launching the job (boolean, read-only)
{% endifmeth %}
{% ifmeth POST %}Make a POST request to this resource to launch the job_template. If any
Make a POST request to this resource to launch the job_template. If any
passwords, inventory, or extra variables (extra_vars) are required, they must
be passed via POST data, with extra_vars given as a YAML or JSON string and
escaped parentheses. If the `inventory_needed_to_start` is `True` then the
@@ -41,4 +41,3 @@ are not provided, a 400 status code will be returned. If the job cannot be
launched, a 405 status code will be returned. If the provided credential or
inventory are not allowed to be used by the user, then a 403 status code will
be returned.
{% endifmeth %}

View File

@@ -1,5 +1,3 @@
receptor_user: awx
receptor_group: awx
receptor_verify: true
receptor_tls: true
receptor_work_commands:
@@ -12,12 +10,12 @@ custom_worksign_public_keyfile: receptor/work-public-key.pem
custom_tls_certfile: receptor/tls/receptor.crt
custom_tls_keyfile: receptor/tls/receptor.key
custom_ca_certfile: receptor/tls/ca/receptor-ca.crt
receptor_user: awx
receptor_group: awx
receptor_protocol: 'tcp'
receptor_listener: true
receptor_port: {{ instance.listener_port }}
receptor_dependencies:
- podman
- crun
- python39-pip
{% verbatim %}
podman_user: "{{ receptor_user }}"
podman_group: "{{ receptor_group }}"
{% endverbatim %}

View File

@@ -9,12 +9,10 @@
shell: /bin/bash
- name: Enable Copr repo for Receptor
command: dnf copr enable ansible-awx/receptor -y
- import_role:
name: ansible.receptor.podman
- import_role:
name: ansible.receptor.setup
- name: Install ansible-runner
pip:
name: ansible-runner
executable: pip3.9
{% endverbatim %}
{% endverbatim %}

View File

@@ -1,4 +1,6 @@
---
collections:
- name: ansible.receptor
version: 1.1.0
source: https://github.com/ansible/receptor-collection/
type: git
version: 0.1.1

View File

@@ -9,9 +9,9 @@ from awx.api.views import (
InstanceUnifiedJobsList,
InstanceInstanceGroupsList,
InstanceHealthCheck,
InstanceInstallBundle,
InstancePeersList,
)
from awx.api.views.instance_install_bundle import InstanceInstallBundle
urls = [

View File

@@ -3,28 +3,26 @@
from django.urls import re_path
from awx.api.views.inventory import (
from awx.api.views import (
InventoryList,
InventoryDetail,
InventoryHostsList,
InventoryGroupsList,
InventoryRootGroupsList,
InventoryVariableData,
InventoryScriptView,
InventoryTreeView,
InventoryInventorySourcesList,
InventoryInventorySourcesUpdate,
InventoryActivityStreamList,
InventoryJobTemplateList,
InventoryAdHocCommandsList,
InventoryAccessList,
InventoryObjectRolesList,
InventoryInstanceGroupsList,
InventoryLabelList,
InventoryCopy,
)
from awx.api.views import (
InventoryHostsList,
InventoryGroupsList,
InventoryInventorySourcesList,
InventoryInventorySourcesUpdate,
InventoryAdHocCommandsList,
InventoryRootGroupsList,
InventoryScriptView,
InventoryTreeView,
InventoryVariableData,
)
urls = [

View File

@@ -3,9 +3,6 @@
from django.urls import re_path
from awx.api.views.inventory import (
InventoryUpdateEventsList,
)
from awx.api.views import (
InventoryUpdateList,
InventoryUpdateDetail,
@@ -13,6 +10,7 @@ from awx.api.views import (
InventoryUpdateStdout,
InventoryUpdateNotificationsList,
InventoryUpdateCredentialsList,
InventoryUpdateEventsList,
)

View File

@@ -10,7 +10,7 @@ from oauthlib import oauth2
from oauth2_provider import views
from awx.main.models import RefreshToken
from awx.api.views.root import ApiOAuthAuthorizationRootView
from awx.api.views import ApiOAuthAuthorizationRootView
class TokenView(views.TokenView):

View File

@@ -3,7 +3,7 @@
from django.urls import re_path
from awx.api.views.organization import (
from awx.api.views import (
OrganizationList,
OrganizationDetail,
OrganizationUsersList,
@@ -14,6 +14,7 @@ from awx.api.views.organization import (
OrganizationJobTemplatesList,
OrganizationWorkflowJobTemplatesList,
OrganizationTeamsList,
OrganizationCredentialList,
OrganizationActivityStreamList,
OrganizationNotificationTemplatesList,
OrganizationNotificationTemplatesErrorList,
@@ -24,8 +25,8 @@ from awx.api.views.organization import (
OrganizationGalaxyCredentialsList,
OrganizationObjectRolesList,
OrganizationAccessList,
OrganizationApplicationList,
)
from awx.api.views import OrganizationCredentialList, OrganizationApplicationList
urls = [

View File

@@ -6,15 +6,13 @@ from django.urls import include, re_path
from awx import MODE
from awx.api.generics import LoggedLoginView, LoggedLogoutView
from awx.api.views.root import (
from awx.api.views import (
ApiRootView,
ApiV2RootView,
ApiV2PingView,
ApiV2ConfigView,
ApiV2SubscriptionView,
ApiV2AttachView,
)
from awx.api.views import (
AuthView,
UserMeList,
DashboardView,
@@ -30,8 +28,8 @@ from awx.api.views import (
OAuth2TokenList,
ApplicationOAuth2TokenList,
OAuth2ApplicationDetail,
MeshVisualizer,
)
from awx.api.views.mesh_visualizer import MeshVisualizer
from awx.api.views.metrics import MetricsView

View File

@@ -1,6 +1,6 @@
from django.urls import re_path
from awx.api.views.webhooks import WebhookKeyView, GithubWebhookReceiver, GitlabWebhookReceiver
from awx.api.views import WebhookKeyView, GithubWebhookReceiver, GitlabWebhookReceiver
urlpatterns = [

View File

@@ -1,55 +0,0 @@
import re
from django.core.validators import RegexValidator, validate_ipv46_address
from django.core.exceptions import ValidationError
class HostnameRegexValidator(RegexValidator):
"""
Fully validates a domain name that is compliant with norms in Linux/RHEL
- Cannot start with a hyphen
- Cannot begin with, or end with a "."
- Cannot contain any whitespaces
- Entire hostname is max 255 chars (including dots)
- Each domain/label is between 1 and 63 characters, except top level domain, which must be at least 2 characters
- Supports ipv4, ipv6, simple hostnames and FQDNs
- Follows RFC 9210 (modern RFC 1123, 1178) requirements
Accepts an IP Address or Hostname as the argument
"""
regex = '^[a-z0-9][-a-z0-9]*$|^([a-z0-9][-a-z0-9]{0,62}[.])*[a-z0-9][-a-z0-9]{1,62}$'
flags = re.IGNORECASE
def __call__(self, value):
regex_matches, err = self.__validate(value)
invalid_input = regex_matches if self.inverse_match else not regex_matches
if invalid_input:
if err is None:
err = ValidationError(self.message, code=self.code, params={"value": value})
raise err
def __str__(self):
return f"regex={self.regex}, message={self.message}, code={self.code}, inverse_match={self.inverse_match}, flags={self.flags}"
def __validate(self, value):
if ' ' in value:
return False, ValidationError("whitespaces in hostnames are illegal")
"""
If we have an IP address, try and validate it.
"""
try:
validate_ipv46_address(value)
return True, None
except ValidationError:
pass
"""
By this point in the code, we probably have a simple hostname, FQDN or a strange hostname like "192.localhost.domain.101"
"""
if not self.regex.match(value):
return False, ValidationError(f"illegal characters detected in hostname={value}. Please verify.")
return True, None

View File

@@ -5,7 +5,6 @@
import dateutil
import functools
import html
import itertools
import logging
import re
import requests
@@ -21,10 +20,9 @@ from urllib3.exceptions import ConnectTimeoutError
# Django
from django.conf import settings
from django.core.exceptions import FieldError, ObjectDoesNotExist
from django.db.models import Q, Sum, Count
from django.db.models import Q, Sum
from django.db import IntegrityError, ProgrammingError, transaction, connection
from django.db.models.fields.related import ManyToManyField, ForeignKey
from django.db.models.functions import Trunc
from django.shortcuts import get_object_or_404
from django.utils.safestring import mark_safe
from django.utils.timezone import now
@@ -49,6 +47,9 @@ from rest_framework import status
from rest_framework_yaml.parsers import YAMLParser
from rest_framework_yaml.renderers import YAMLRenderer
# QSStats
import qsstats
# ANSIConv
import ansiconv
@@ -68,7 +69,6 @@ from awx.api.generics import (
APIView,
BaseUsersList,
CopyAPIView,
GenericCancelView,
GenericAPIView,
ListAPIView,
ListCreateAPIView,
@@ -122,6 +122,56 @@ from awx.api.views.mixin import (
UnifiedJobDeletionMixin,
NoTruncateMixin,
)
from awx.api.views.instance_install_bundle import InstanceInstallBundle # noqa
from awx.api.views.inventory import ( # noqa
InventoryList,
InventoryDetail,
InventoryUpdateEventsList,
InventoryList,
InventoryDetail,
InventoryActivityStreamList,
InventoryInstanceGroupsList,
InventoryAccessList,
InventoryObjectRolesList,
InventoryJobTemplateList,
InventoryLabelList,
InventoryCopy,
)
from awx.api.views.mesh_visualizer import MeshVisualizer # noqa
from awx.api.views.organization import ( # noqa
OrganizationList,
OrganizationDetail,
OrganizationInventoriesList,
OrganizationUsersList,
OrganizationAdminsList,
OrganizationExecutionEnvironmentsList,
OrganizationProjectsList,
OrganizationJobTemplatesList,
OrganizationWorkflowJobTemplatesList,
OrganizationTeamsList,
OrganizationActivityStreamList,
OrganizationNotificationTemplatesList,
OrganizationNotificationTemplatesAnyList,
OrganizationNotificationTemplatesErrorList,
OrganizationNotificationTemplatesStartedList,
OrganizationNotificationTemplatesSuccessList,
OrganizationNotificationTemplatesApprovalList,
OrganizationInstanceGroupsList,
OrganizationGalaxyCredentialsList,
OrganizationAccessList,
OrganizationObjectRolesList,
)
from awx.api.views.root import ( # noqa
ApiRootView,
ApiOAuthAuthorizationRootView,
ApiVersionRootView,
ApiV2RootView,
ApiV2PingView,
ApiV2ConfigView,
ApiV2SubscriptionView,
ApiV2AttachView,
)
from awx.api.views.webhooks import WebhookKeyView, GithubWebhookReceiver, GitlabWebhookReceiver # noqa
from awx.api.pagination import UnifiedJobEventPagination
from awx.main.utils import set_environ
@@ -282,50 +332,30 @@ class DashboardJobsGraphView(APIView):
success_query = success_query.filter(instance_of=models.ProjectUpdate)
failed_query = failed_query.filter(instance_of=models.ProjectUpdate)
end = now()
interval = 'day'
success_qss = qsstats.QuerySetStats(success_query, 'finished')
failed_qss = qsstats.QuerySetStats(failed_query, 'finished')
start_date = now()
if period == 'month':
start = end - dateutil.relativedelta.relativedelta(months=1)
end_date = start_date - dateutil.relativedelta.relativedelta(months=1)
interval = 'days'
elif period == 'two_weeks':
start = end - dateutil.relativedelta.relativedelta(weeks=2)
end_date = start_date - dateutil.relativedelta.relativedelta(weeks=2)
interval = 'days'
elif period == 'week':
start = end - dateutil.relativedelta.relativedelta(weeks=1)
end_date = start_date - dateutil.relativedelta.relativedelta(weeks=1)
interval = 'days'
elif period == 'day':
start = end - dateutil.relativedelta.relativedelta(days=1)
interval = 'hour'
end_date = start_date - dateutil.relativedelta.relativedelta(days=1)
interval = 'hours'
else:
return Response({'error': _('Unknown period "%s"') % str(period)}, status=status.HTTP_400_BAD_REQUEST)
dashboard_data = {"jobs": {"successful": [], "failed": []}}
succ_list = dashboard_data['jobs']['successful']
fail_list = dashboard_data['jobs']['failed']
qs_s = (
success_query.filter(finished__range=(start, end))
.annotate(d=Trunc('finished', interval, tzinfo=end.tzinfo))
.order_by()
.values('d')
.annotate(agg=Count('id', distinct=True))
)
data_s = {item['d']: item['agg'] for item in qs_s}
qs_f = (
failed_query.filter(finished__range=(start, end))
.annotate(d=Trunc('finished', interval, tzinfo=end.tzinfo))
.order_by()
.values('d')
.annotate(agg=Count('id', distinct=True))
)
data_f = {item['d']: item['agg'] for item in qs_f}
start_date = start.replace(hour=0, minute=0, second=0, microsecond=0)
for d in itertools.count():
date = start_date + dateutil.relativedelta.relativedelta(days=d)
if date > end:
break
succ_list.append([time.mktime(date.timetuple()), data_s.get(date, 0)])
fail_list.append([time.mktime(date.timetuple()), data_f.get(date, 0)])
for element in success_qss.time_series(end_date, start_date, interval=interval):
dashboard_data['jobs']['successful'].append([time.mktime(element[0].timetuple()), element[1]])
for element in failed_qss.time_series(end_date, start_date, interval=interval):
dashboard_data['jobs']['failed'].append([time.mktime(element[0].timetuple()), element[1]])
return Response(dashboard_data)
@@ -344,13 +374,6 @@ class InstanceDetail(RetrieveUpdateAPIView):
model = models.Instance
serializer_class = serializers.InstanceSerializer
def update_raw_data(self, data):
# these fields are only valid on creation of an instance, so they unwanted on detail view
data.pop('listener_port', None)
data.pop('node_type', None)
data.pop('hostname', None)
return super(InstanceDetail, self).update_raw_data(data)
def update(self, request, *args, **kwargs):
r = super(InstanceDetail, self).update(request, *args, **kwargs)
if status.is_success(r.status_code):
@@ -418,8 +441,8 @@ class InstanceHealthCheck(GenericAPIView):
permission_classes = (IsSystemAdminOrAuditor,)
def get_queryset(self):
return super().get_queryset().filter(node_type='execution')
# FIXME: For now, we don't have a good way of checking the health of a hop node.
return super().get_queryset().exclude(node_type='hop')
def get(self, request, *args, **kwargs):
obj = self.get_object()
@@ -439,10 +462,9 @@ class InstanceHealthCheck(GenericAPIView):
execution_node_health_check.apply_async([obj.hostname])
else:
return Response(
{"error": f"Cannot run a health check on instances of type {obj.node_type}. Health checks can only be run on execution nodes."},
status=status.HTTP_400_BAD_REQUEST,
)
from awx.main.tasks.system import cluster_node_health_check
cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname)
return Response({'msg': f"Health check is running for {obj.hostname}."}, status=status.HTTP_200_OK)
@@ -1004,11 +1026,20 @@ class SystemJobEventsList(SubListAPIView):
return job.get_event_queryset()
class ProjectUpdateCancel(GenericCancelView):
class ProjectUpdateCancel(RetrieveAPIView):
model = models.ProjectUpdate
obj_permission_type = 'cancel'
serializer_class = serializers.ProjectUpdateCancelSerializer
def post(self, request, *args, **kwargs):
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
class ProjectUpdateNotificationsList(SubListAPIView):
@@ -2247,8 +2278,6 @@ class InventorySourceUpdateView(RetrieveAPIView):
def post(self, request, *args, **kwargs):
obj = self.get_object()
serializer = self.get_serializer(instance=obj, data=request.data)
serializer.is_valid(raise_exception=True)
if obj.can_update:
update = obj.update()
if not update:
@@ -2283,11 +2312,20 @@ class InventoryUpdateCredentialsList(SubListAPIView):
relationship = 'credentials'
class InventoryUpdateCancel(GenericCancelView):
class InventoryUpdateCancel(RetrieveAPIView):
model = models.InventoryUpdate
obj_permission_type = 'cancel'
serializer_class = serializers.InventoryUpdateCancelSerializer
def post(self, request, *args, **kwargs):
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
class InventoryUpdateNotificationsList(SubListAPIView):
@@ -3062,7 +3100,8 @@ class WorkflowJobNodeChildrenBaseList(SubListAPIView):
search_fields = ('unified_job_template__name', 'unified_job_template__description')
#
# Limit the set of WorkflowJobNodes to the related nodes of specified by self.relationship
# Limit the set of WorkflowJobeNodes to the related nodes of specified by
#'relationship'
#
def get_queryset(self):
parent = self.get_parent_object()
@@ -3364,15 +3403,20 @@ class WorkflowJobWorkflowNodesList(SubListAPIView):
return super(WorkflowJobWorkflowNodesList, self).get_queryset().order_by('id')
class WorkflowJobCancel(GenericCancelView):
class WorkflowJobCancel(RetrieveAPIView):
model = models.WorkflowJob
obj_permission_type = 'cancel'
serializer_class = serializers.WorkflowJobCancelSerializer
def post(self, request, *args, **kwargs):
r = super().post(request, *args, **kwargs)
ScheduleWorkflowManager().schedule()
return r
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
ScheduleWorkflowManager().schedule()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
class WorkflowJobNotificationsList(SubListAPIView):
@@ -3528,11 +3572,20 @@ class JobActivityStreamList(SubListAPIView):
search_fields = ('changes',)
class JobCancel(GenericCancelView):
class JobCancel(RetrieveAPIView):
model = models.Job
obj_permission_type = 'cancel'
serializer_class = serializers.JobCancelSerializer
def post(self, request, *args, **kwargs):
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
class JobRelaunch(RetrieveAPIView):
@@ -4003,11 +4056,20 @@ class AdHocCommandDetail(UnifiedJobDeletionMixin, RetrieveDestroyAPIView):
serializer_class = serializers.AdHocCommandDetailSerializer
class AdHocCommandCancel(GenericCancelView):
class AdHocCommandCancel(RetrieveAPIView):
model = models.AdHocCommand
obj_permission_type = 'cancel'
serializer_class = serializers.AdHocCommandCancelSerializer
def post(self, request, *args, **kwargs):
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
class AdHocCommandRelaunch(GenericAPIView):
@@ -4142,11 +4204,20 @@ class SystemJobDetail(UnifiedJobDeletionMixin, RetrieveDestroyAPIView):
serializer_class = serializers.SystemJobSerializer
class SystemJobCancel(GenericCancelView):
class SystemJobCancel(RetrieveAPIView):
model = models.SystemJob
obj_permission_type = 'cancel'
serializer_class = serializers.SystemJobCancelSerializer
def post(self, request, *args, **kwargs):
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
class SystemJobNotificationsList(SubListAPIView):

View File

@@ -178,7 +178,7 @@ def generate_receptor_tls(instance_obj):
.public_key(csr.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.datetime.utcnow())
.not_valid_after(datetime.datetime.utcnow() + datetime.timedelta(days=3650))
.not_valid_after(datetime.datetime.utcnow() + datetime.timedelta(days=10))
.add_extension(
csr.extensions.get_extension_for_class(x509.SubjectAlternativeName).value,
critical=csr.extensions.get_extension_for_class(x509.SubjectAlternativeName).critical,

View File

@@ -5,11 +5,9 @@
import logging
# Django
from django.conf import settings
from django.utils.translation import gettext_lazy as _
# Django REST Framework
from rest_framework.permissions import AllowAny
from rest_framework.response import Response
from rest_framework.exceptions import PermissionDenied
@@ -33,14 +31,9 @@ class MetricsView(APIView):
renderer_classes = [renderers.PlainTextRenderer, renderers.PrometheusJSONRenderer, renderers.BrowsableAPIRenderer]
def initialize_request(self, request, *args, **kwargs):
if settings.ALLOW_METRICS_FOR_ANONYMOUS_USERS:
self.permission_classes = (AllowAny,)
return super(APIView, self).initialize_request(request, *args, **kwargs)
def get(self, request):
'''Show Metrics Details'''
if settings.ALLOW_METRICS_FOR_ANONYMOUS_USERS or request.user.is_superuser or request.user.is_system_auditor:
if request.user.is_superuser or request.user.is_system_auditor:
metrics_to_show = ''
if not request.query_params.get('subsystemonly', "0") == "1":
metrics_to_show += metrics().decode('UTF-8')

View File

@@ -16,7 +16,7 @@ from rest_framework import status
from awx.main.constants import ACTIVE_STATES
from awx.main.utils import get_object_or_400
from awx.main.models.ha import Instance, InstanceGroup, schedule_policy_task
from awx.main.models.ha import Instance, InstanceGroup
from awx.main.models.organization import Team
from awx.main.models.projects import Project
from awx.main.models.inventory import Inventory
@@ -107,11 +107,6 @@ class InstanceGroupMembershipMixin(object):
if inst_name in ig_obj.policy_instance_list:
ig_obj.policy_instance_list.pop(ig_obj.policy_instance_list.index(inst_name))
ig_obj.save(update_fields=['policy_instance_list'])
# sometimes removing an instance has a non-obvious consequence
# this is almost always true if policy_instance_percentage or _minimum is non-zero
# after removing a single instance, the other memberships need to be re-balanced
schedule_policy_task()
return response

View File

@@ -6237,5 +6237,4 @@ msgstr "%s se está actualizando."
#: awx/ui/urls.py:24
msgid "This page will refresh when complete."
msgstr "Esta página se actualizará cuando se complete."
msgstr "Esta página se actualizará cuando se complete."

View File

@@ -721,7 +721,7 @@ msgstr "DTSTART valide obligatoire dans rrule. La valeur doit commencer par : DT
#: awx/api/serializers.py:4657
msgid ""
"DTSTART cannot be a naive datetime. Specify ;TZINFO= or YYYYMMDDTHHMMSSZZ."
msgstr "DTSTART ne peut correspondre à une date-heure naïve. Spécifier ;TZINFO= ou YYYYMMDDTHHMMSSZZ."
msgstr "DTSTART ne peut correspondre à une DateHeure naïve. Spécifier ;TZINFO= ou YYYYMMDDTHHMMSSZZ."
#: awx/api/serializers.py:4659
msgid "Multiple DTSTART is not supported."
@@ -6239,5 +6239,4 @@ msgstr "%s est en cours de mise à niveau."
#: awx/ui/urls.py:24
msgid "This page will refresh when complete."
msgstr "Cette page sera rafraîchie une fois terminée."
msgstr "Cette page sera rafraîchie une fois terminée."

View File

@@ -6237,5 +6237,4 @@ msgstr "Er wordt momenteel een upgrade van%s geïnstalleerd."
#: awx/ui/urls.py:24
msgid "This page will refresh when complete."
msgstr "Deze pagina wordt vernieuwd als hij klaar is."
msgstr "Deze pagina wordt vernieuwd als hij klaar is."

View File

@@ -993,6 +993,9 @@ class HostAccess(BaseAccess):
if data and 'name' in data:
self.check_license(add_host_name=data['name'])
# Check the per-org limit
self.check_org_host_limit({'inventory': obj.inventory}, add_host_name=data['name'])
# Checks for admin or change permission on inventory, controls whether
# the user can edit variable data.
return obj and self.user in obj.inventory.admin_role
@@ -2697,66 +2700,46 @@ class ActivityStreamAccess(BaseAccess):
# 'job_template', 'job', 'project', 'project_update', 'workflow_job',
# 'inventory_source', 'workflow_job_template'
q = Q(user=self.user)
inventory_set = Inventory.accessible_pk_qs(self.user, 'read_role')
if inventory_set:
q |= (
Q(ad_hoc_command__inventory__in=inventory_set)
| Q(inventory__in=inventory_set)
| Q(host__inventory__in=inventory_set)
| Q(group__inventory__in=inventory_set)
| Q(inventory_source__inventory__in=inventory_set)
| Q(inventory_update__inventory_source__inventory__in=inventory_set)
)
credential_set = Credential.accessible_pk_qs(self.user, 'read_role')
if credential_set:
q |= Q(credential__in=credential_set)
inventory_set = Inventory.accessible_objects(self.user, 'read_role')
credential_set = Credential.accessible_objects(self.user, 'read_role')
auditing_orgs = (
(Organization.accessible_objects(self.user, 'admin_role') | Organization.accessible_objects(self.user, 'auditor_role'))
.distinct()
.values_list('id', flat=True)
)
if auditing_orgs:
q |= (
Q(user__in=auditing_orgs.values('member_role__members'))
| Q(organization__in=auditing_orgs)
| Q(notification_template__organization__in=auditing_orgs)
| Q(notification__notification_template__organization__in=auditing_orgs)
| Q(label__organization__in=auditing_orgs)
| Q(role__in=Role.objects.filter(ancestors__in=self.user.roles.all()) if auditing_orgs else [])
)
project_set = Project.accessible_pk_qs(self.user, 'read_role')
if project_set:
q |= Q(project__in=project_set) | Q(project_update__project__in=project_set)
jt_set = JobTemplate.accessible_pk_qs(self.user, 'read_role')
if jt_set:
q |= Q(job_template__in=jt_set) | Q(job__job_template__in=jt_set)
wfjt_set = WorkflowJobTemplate.accessible_pk_qs(self.user, 'read_role')
if wfjt_set:
q |= (
Q(workflow_job_template__in=wfjt_set)
| Q(workflow_job_template_node__workflow_job_template__in=wfjt_set)
| Q(workflow_job__workflow_job_template__in=wfjt_set)
)
team_set = Team.accessible_pk_qs(self.user, 'read_role')
if team_set:
q |= Q(team__in=team_set)
project_set = Project.accessible_objects(self.user, 'read_role')
jt_set = JobTemplate.accessible_objects(self.user, 'read_role')
team_set = Team.accessible_objects(self.user, 'read_role')
wfjt_set = WorkflowJobTemplate.accessible_objects(self.user, 'read_role')
app_set = OAuth2ApplicationAccess(self.user).filtered_queryset()
if app_set:
q |= Q(o_auth2_application__in=app_set)
token_set = OAuth2TokenAccess(self.user).filtered_queryset()
if token_set:
q |= Q(o_auth2_access_token__in=token_set)
return qs.filter(q).distinct()
return qs.filter(
Q(ad_hoc_command__inventory__in=inventory_set)
| Q(o_auth2_application__in=app_set)
| Q(o_auth2_access_token__in=token_set)
| Q(user__in=auditing_orgs.values('member_role__members'))
| Q(user=self.user)
| Q(organization__in=auditing_orgs)
| Q(inventory__in=inventory_set)
| Q(host__inventory__in=inventory_set)
| Q(group__inventory__in=inventory_set)
| Q(inventory_source__inventory__in=inventory_set)
| Q(inventory_update__inventory_source__inventory__in=inventory_set)
| Q(credential__in=credential_set)
| Q(team__in=team_set)
| Q(project__in=project_set)
| Q(project_update__project__in=project_set)
| Q(job_template__in=jt_set)
| Q(job__job_template__in=jt_set)
| Q(workflow_job_template__in=wfjt_set)
| Q(workflow_job_template_node__workflow_job_template__in=wfjt_set)
| Q(workflow_job__workflow_job_template__in=wfjt_set)
| Q(notification_template__organization__in=auditing_orgs)
| Q(notification__notification_template__organization__in=auditing_orgs)
| Q(label__organization__in=auditing_orgs)
| Q(role__in=Role.objects.filter(ancestors__in=self.user.roles.all()) if auditing_orgs else [])
).distinct()
def can_add(self, data):
return False

View File

@@ -1,8 +1,8 @@
import datetime
import asyncio
import logging
import aioredis
import redis
import redis.asyncio
import re
from prometheus_client import (
@@ -82,7 +82,7 @@ class BroadcastWebsocketStatsManager:
async def run_loop(self):
try:
redis_conn = await redis.asyncio.Redis.from_url(settings.BROKER_URL)
redis_conn = await aioredis.create_redis_pool(settings.BROKER_URL)
while True:
stats_data_str = ''.join(stat.serialize() for stat in self._stats.values())
await redis_conn.set(self._redis_key, stats_data_str)
@@ -122,8 +122,8 @@ class BroadcastWebsocketStats:
'Number of messages received, to be forwarded, by the broadcast websocket system',
registry=self._registry,
)
self._messages_received_current_conn = Gauge(
f'awx_{self.remote_name}_messages_received_currrent_conn',
self._messages_received = Gauge(
f'awx_{self.remote_name}_messages_received',
'Number forwarded messages received by the broadcast websocket system, for the duration of the current connection',
registry=self._registry,
)
@@ -144,13 +144,13 @@ class BroadcastWebsocketStats:
def record_message_received(self):
self._internal_messages_received_per_minute.record()
self._messages_received_current_conn.inc()
self._messages_received.inc()
self._messages_received_total.inc()
def record_connection_established(self):
self._connection.state('connected')
self._connection_start.set_to_current_time()
self._messages_received_current_conn.set(0)
self._messages_received.set(0)
def record_connection_lost(self):
self._connection.state('disconnected')

View File

@@ -16,7 +16,7 @@ from awx.conf.license import get_license
from awx.main.utils import get_awx_version, camelcase_to_underscore, datetime_hook
from awx.main import models
from awx.main.analytics import register
from awx.main.scheduler.task_manager_models import TaskManagerModels
from awx.main.scheduler.task_manager_models import TaskManagerInstances
"""
This module is used to define metrics collected by awx.main.analytics.gather()
@@ -237,8 +237,9 @@ def projects_by_scm_type(since, **kwargs):
def instance_info(since, include_hostnames=False, **kwargs):
info = {}
# Use same method that the TaskManager does to compute consumed capacity without querying all running jobs for each Instance
tm_models = TaskManagerModels.init_with_consumed_capacity(instance_fields=['uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'enabled'])
for tm_instance in tm_models.instances.instances_by_hostname.values():
active_tasks = models.UnifiedJob.objects.filter(status__in=['running', 'waiting']).only('task_impact', 'controller_node', 'execution_node')
tm_instances = TaskManagerInstances(active_tasks, instance_fields=['uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'enabled'])
for tm_instance in tm_instances.instances_by_hostname.values():
instance = tm_instance.obj
instance_info = {
'uuid': instance.uuid,
@@ -250,7 +251,6 @@ def instance_info(since, include_hostnames=False, **kwargs):
'enabled': instance.enabled,
'consumed_capacity': tm_instance.consumed_capacity,
'remaining_capacity': instance.capacity - tm_instance.consumed_capacity,
'node_type': instance.node_type,
}
if include_hostnames is True:
instance_info['hostname'] = instance.hostname

View File

@@ -57,7 +57,6 @@ def metrics():
[
'hostname',
'instance_uuid',
'node_type',
],
registry=REGISTRY,
)
@@ -85,7 +84,6 @@ def metrics():
[
'hostname',
'instance_uuid',
'node_type',
],
registry=REGISTRY,
)
@@ -113,7 +111,6 @@ def metrics():
[
'hostname',
'instance_uuid',
'node_type',
],
registry=REGISTRY,
)
@@ -123,7 +120,6 @@ def metrics():
[
'hostname',
'instance_uuid',
'node_type',
],
registry=REGISTRY,
)
@@ -184,13 +180,12 @@ def metrics():
instance_data = instance_info(None, include_hostnames=True)
for uuid, info in instance_data.items():
hostname = info['hostname']
node_type = info['node_type']
INSTANCE_CAPACITY.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).set(instance_data[uuid]['capacity'])
INSTANCE_CAPACITY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['capacity'])
INSTANCE_CPU.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['cpu'])
INSTANCE_MEMORY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['memory'])
INSTANCE_CONSUMED_CAPACITY.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).set(instance_data[uuid]['consumed_capacity'])
INSTANCE_REMAINING_CAPACITY.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).set(instance_data[uuid]['remaining_capacity'])
INSTANCE_INFO.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).info(
INSTANCE_CONSUMED_CAPACITY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['consumed_capacity'])
INSTANCE_REMAINING_CAPACITY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['remaining_capacity'])
INSTANCE_INFO.labels(hostname=hostname, instance_uuid=uuid).info(
{
'enabled': str(instance_data[uuid]['enabled']),
'managed_by_policy': str(instance_data[uuid]['managed_by_policy']),

View File

@@ -5,9 +5,7 @@ import logging
from django.conf import settings
from django.apps import apps
from awx.main.consumers import emit_channel_notification
from awx.main.utils import is_testing
root_key = 'awx_metrics'
logger = logging.getLogger('awx.main.analytics')
@@ -165,10 +163,14 @@ class Metrics:
Instance = apps.get_model('main', 'Instance')
if instance_name:
self.instance_name = instance_name
elif is_testing():
elif settings.IS_TESTING():
self.instance_name = "awx_testing"
else:
self.instance_name = Instance.objects.my_hostname()
try:
self.instance_name = Instance.objects.me().hostname
except Exception as e:
self.instance_name = settings.CLUSTER_HOST_ID
logger.info(f'Instance {self.instance_name} seems to be unregistered, error: {e}')
# metric name, help_text
METRICSLIST = [

View File

@@ -569,7 +569,7 @@ register(
register(
'LOG_AGGREGATOR_LOGGERS',
field_class=fields.StringListField,
default=['awx', 'activity_stream', 'job_events', 'system_tracking', 'broadcast_websocket'],
default=['awx', 'activity_stream', 'job_events', 'system_tracking'],
label=_('Loggers Sending Data to Log Aggregator Form'),
help_text=_(
'List of loggers that will send HTTP logs to the collector, these can '
@@ -577,8 +577,7 @@ register(
'awx - service logs\n'
'activity_stream - activity stream records\n'
'job_events - callback data from Ansible job events\n'
'system_tracking - facts gathered from scan jobs\n'
'broadcast_websocket - errors pertaining to websockets broadcast metrics\n'
'system_tracking - facts gathered from scan jobs.'
),
category=_('Logging'),
category_slug='logging',

View File

@@ -9,16 +9,10 @@ aim_inputs = {
'fields': [
{
'id': 'url',
'label': _('CyberArk CCP URL'),
'label': _('CyberArk AIM URL'),
'type': 'string',
'format': 'url',
},
{
'id': 'webservice_id',
'label': _('Web Service ID'),
'type': 'string',
'help_text': _('The CCP Web Service ID. Leave blank to default to AIMWebService.'),
},
{
'id': 'app_id',
'label': _('Application ID'),
@@ -70,13 +64,10 @@ def aim_backend(**kwargs):
client_cert = kwargs.get('client_cert', None)
client_key = kwargs.get('client_key', None)
verify = kwargs['verify']
webservice_id = kwargs['webservice_id']
app_id = kwargs['app_id']
object_query = kwargs['object_query']
object_query_format = kwargs['object_query_format']
reason = kwargs.get('reason', None)
if webservice_id == '':
webservice_id = 'AIMWebService'
query_params = {
'AppId': app_id,
@@ -87,7 +78,7 @@ def aim_backend(**kwargs):
query_params['reason'] = reason
request_qs = '?' + urlencode(query_params, quote_via=quote)
request_url = urljoin(url, '/'.join([webservice_id, 'api', 'Accounts']))
request_url = urljoin(url, '/'.join(['AIMWebService', 'api', 'Accounts']))
with CertFiles(client_cert, client_key) as cert:
res = requests.get(
@@ -101,4 +92,4 @@ def aim_backend(**kwargs):
return res.json()['Content']
aim_plugin = CredentialPlugin('CyberArk Central Credential Provider Lookup', inputs=aim_inputs, backend=aim_backend)
aim_plugin = CredentialPlugin('CyberArk AIM Central Credential Provider Lookup', inputs=aim_inputs, backend=aim_backend)

View File

@@ -1,5 +1,6 @@
from .plugin import CredentialPlugin, CertFiles, raise_for_status
import base64
from urllib.parse import urljoin, quote
from django.utils.translation import gettext_lazy as _
@@ -60,7 +61,7 @@ def conjur_backend(**kwargs):
cacert = kwargs.get('cacert', None)
auth_kwargs = {
'headers': {'Content-Type': 'text/plain', 'Accept-Encoding': 'base64'},
'headers': {'Content-Type': 'text/plain'},
'data': api_key,
'allow_redirects': False,
}
@@ -68,9 +69,9 @@ def conjur_backend(**kwargs):
with CertFiles(cacert) as cert:
# https://www.conjur.org/api.html#authentication-authenticate-post
auth_kwargs['verify'] = cert
resp = requests.post(urljoin(url, '/'.join(['api', 'authn', account, username, 'authenticate'])), **auth_kwargs)
resp = requests.post(urljoin(url, '/'.join(['authn', account, username, 'authenticate'])), **auth_kwargs)
raise_for_status(resp)
token = resp.content.decode('utf-8')
token = base64.b64encode(resp.content).decode('utf-8')
lookup_kwargs = {
'headers': {'Authorization': 'Token token="{}"'.format(token)},
@@ -78,10 +79,9 @@ def conjur_backend(**kwargs):
}
# https://www.conjur.org/api.html#secrets-retrieve-a-secret-get
path = urljoin(url, '/'.join(['api', 'secrets', account, 'variable', secret_path]))
path = urljoin(url, '/'.join(['secrets', account, 'variable', secret_path]))
if version:
ver = "version={}".format(version)
path = '?'.join([path, ver])
path = '?'.join([path, version])
with CertFiles(cacert) as cert:
lookup_kwargs['verify'] = cert
@@ -90,4 +90,4 @@ def conjur_backend(**kwargs):
return resp.text
conjur_plugin = CredentialPlugin('CyberArk Conjur Secrets Manager Lookup', inputs=conjur_inputs, backend=conjur_backend)
conjur_plugin = CredentialPlugin('CyberArk Conjur Secret Lookup', inputs=conjur_inputs, backend=conjur_backend)

View File

@@ -1,7 +1,6 @@
import copy
import os
import pathlib
import time
from urllib.parse import urljoin
from .plugin import CredentialPlugin, CertFiles, raise_for_status
@@ -248,15 +247,7 @@ def kv_backend(**kwargs):
request_url = urljoin(url, '/'.join(['v1'] + path_segments)).rstrip('/')
with CertFiles(cacert) as cert:
request_kwargs['verify'] = cert
request_retries = 0
while request_retries < 5:
response = sess.get(request_url, **request_kwargs)
# https://developer.hashicorp.com/vault/docs/enterprise/consistency
if response.status_code == 412:
request_retries += 1
time.sleep(1)
else:
break
response = sess.get(request_url, **request_kwargs)
raise_for_status(response)
json = response.json()
@@ -298,15 +289,8 @@ def ssh_backend(**kwargs):
with CertFiles(cacert) as cert:
request_kwargs['verify'] = cert
request_retries = 0
while request_retries < 5:
resp = sess.post(request_url, **request_kwargs)
# https://developer.hashicorp.com/vault/docs/enterprise/consistency
if resp.status_code == 412:
request_retries += 1
time.sleep(1)
else:
break
resp = sess.post(request_url, **request_kwargs)
raise_for_status(resp)
return resp.json()['data']['signed_key']

View File

@@ -3,7 +3,6 @@ import uuid
import json
from django.conf import settings
from django.db import connection
import redis
from awx.main.dispatch import get_local_queuename
@@ -50,10 +49,7 @@ class Control(object):
reply_queue = Control.generate_reply_queue_name()
self.result = None
if not connection.get_autocommit():
raise RuntimeError('Control-with-reply messages can only be done in autocommit mode')
with pg_bus_conn() as conn:
with pg_bus_conn(new_connection=True) as conn:
conn.listen(reply_queue)
send_data = {'control': command, 'reply_to': reply_queue}
if extra_data:

View File

@@ -387,8 +387,6 @@ class AutoscalePool(WorkerPool):
reaper.reap_job(j, 'failed')
except Exception:
logger.exception('failed to reap job UUID {}'.format(w.current_task['uuid']))
else:
logger.warning(f'Worker was told to quit but has not, pid={w.pid}')
orphaned.extend(w.orphaned_tasks)
self.workers.remove(w)
elif w.idle and len(self.workers) > self.min_workers:
@@ -452,6 +450,9 @@ class AutoscalePool(WorkerPool):
try:
if isinstance(body, dict) and body.get('bind_kwargs'):
self.add_bind_kwargs(body)
# when the cluster heartbeat occurs, clean up internally
if isinstance(body, dict) and 'cluster_node_heartbeat' in body['task']:
self.cleanup()
if self.should_grow:
self.up()
# we don't care about "preferred queue" round robin distribution, just
@@ -466,7 +467,7 @@ class AutoscalePool(WorkerPool):
task_name = 'unknown'
if isinstance(body, dict):
task_name = body.get('task')
logger.warning(f'Workers maxed, queuing {task_name}, load: {sum(len(w.managed_tasks) for w in self.workers)} / {len(self.workers)}')
logger.warn(f'Workers maxed, queuing {task_name}, load: {sum(len(w.managed_tasks) for w in self.workers)} / {len(self.workers)}')
return super(AutoscalePool, self).write(preferred_queue, body)
except Exception:
for conn in connections.all():

View File

@@ -1,13 +1,14 @@
import inspect
import logging
import sys
import json
import time
from uuid import uuid4
from django.conf import settings
from django_guid import get_guid
from . import pg_bus_conn
from awx.main.utils import is_testing
logger = logging.getLogger('awx.main.dispatch')
@@ -92,7 +93,7 @@ class task:
obj.update(**kw)
if callable(queue):
queue = queue()
if not is_testing():
if not settings.IS_TESTING(sys.argv):
with pg_bus_conn() as conn:
conn.notify(queue, json.dumps(obj))
return (obj, queue)

View File

@@ -16,7 +16,12 @@ def startup_reaping():
If this particular instance is starting, then we know that any running jobs are invalid
so we will reap those jobs as a special action here
"""
jobs = UnifiedJob.objects.filter(status='running', controller_node=Instance.objects.my_hostname())
try:
me = Instance.objects.me()
except RuntimeError as e:
logger.warning(f'Local instance is not registered, not running startup reaper: {e}')
return
jobs = UnifiedJob.objects.filter(status='running', controller_node=me.hostname)
job_ids = []
for j in jobs:
job_ids.append(j.id)
@@ -57,13 +62,16 @@ def reap_waiting(instance=None, status='failed', job_explanation=None, grace_per
if grace_period is None:
grace_period = settings.JOB_WAITING_GRACE_PERIOD + settings.TASK_MANAGER_TIMEOUT
if instance is None:
hostname = Instance.objects.my_hostname()
else:
hostname = instance.hostname
me = instance
if me is None:
try:
me = Instance.objects.me()
except RuntimeError as e:
logger.warning(f'Local instance is not registered, not running reaper: {e}')
return
if ref_time is None:
ref_time = tz_now()
jobs = UnifiedJob.objects.filter(status='waiting', modified__lte=ref_time - timedelta(seconds=grace_period), controller_node=hostname)
jobs = UnifiedJob.objects.filter(status='waiting', modified__lte=ref_time - timedelta(seconds=grace_period), controller_node=me.hostname)
if excluded_uuids:
jobs = jobs.exclude(celery_task_id__in=excluded_uuids)
for j in jobs:
@@ -74,13 +82,16 @@ def reap(instance=None, status='failed', job_explanation=None, excluded_uuids=No
"""
Reap all jobs in running for this instance.
"""
if instance is None:
hostname = Instance.objects.my_hostname()
else:
hostname = instance.hostname
me = instance
if me is None:
try:
me = Instance.objects.me()
except RuntimeError as e:
logger.warning(f'Local instance is not registered, not running reaper: {e}')
return
workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id
jobs = UnifiedJob.objects.filter(
Q(status='running') & (Q(execution_node=hostname) | Q(controller_node=hostname)) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
Q(status='running') & (Q(execution_node=me.hostname) | Q(controller_node=me.hostname)) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
)
if excluded_uuids:
jobs = jobs.exclude(celery_task_id__in=excluded_uuids)

View File

@@ -114,6 +114,7 @@ class AWXConsumerBase(object):
queue = 0
self.pool.write(queue, body)
self.total_messages += 1
self.record_statistics()
@log_excess_runtime(logger)
def record_statistics(self):
@@ -155,16 +156,6 @@ class AWXConsumerPG(AWXConsumerBase):
# if no successful loops have ran since startup, then we should fail right away
self.pg_is_down = True # set so that we fail if we get database errors on startup
self.pg_down_time = time.time() - self.pg_max_wait # allow no grace period
self.last_cleanup = time.time()
def run_periodic_tasks(self):
self.record_statistics() # maintains time buffer in method
if time.time() - self.last_cleanup > 60: # same as cluster_node_heartbeat
# NOTE: if we run out of database connections, it is important to still run cleanup
# so that we scale down workers and free up connections
self.pool.cleanup()
self.last_cleanup = time.time()
def run(self, *args, **kwargs):
super(AWXConsumerPG, self).run(*args, **kwargs)
@@ -180,10 +171,8 @@ class AWXConsumerPG(AWXConsumerBase):
if init is False:
self.worker.on_start()
init = True
for e in conn.events(yield_timeouts=True):
if e is not None:
self.process_task(json.loads(e.payload))
self.run_periodic_tasks()
for e in conn.events():
self.process_task(json.loads(e.payload))
self.pg_is_down = False
if self.should_stop:
return
@@ -240,8 +229,6 @@ class BaseWorker(object):
# so we can establish a new connection
conn.close_if_unusable_or_obsolete()
self.perform_work(body, *args)
except Exception:
logger.exception(f'Unhandled exception in perform_work in worker pid={os.getpid()}')
finally:
if 'uuid' in body:
uuid = body['uuid']

View File

@@ -3,12 +3,14 @@ import logging
import os
import signal
import time
import traceback
import datetime
from django.conf import settings
from django.utils.functional import cached_property
from django.utils.timezone import now as tz_now
from django.db import transaction, connection as django_connection
from django.db import DatabaseError, OperationalError, transaction, connection as django_connection
from django.db.utils import InterfaceError, InternalError
from django_guid import set_guid
import psutil
@@ -62,7 +64,6 @@ class CallbackBrokerWorker(BaseWorker):
"""
MAX_RETRIES = 2
INDIVIDUAL_EVENT_RETRIES = 3
last_stats = time.time()
last_flush = time.time()
total = 0
@@ -163,48 +164,38 @@ class CallbackBrokerWorker(BaseWorker):
else: # only calculate the seconds if the created time already has been set
metrics_total_job_event_processing_seconds += e.modified - e.created
metrics_duration_to_save = time.perf_counter()
saved_events = []
try:
cls.objects.bulk_create(events)
metrics_bulk_events_saved += len(events)
saved_events = events
self.buff[cls] = []
except Exception as exc:
# If the database is flaking, let ensure_connection throw a general exception
# will be caught by the outer loop, which goes into a proper sleep and retry loop
django_connection.ensure_connection()
logger.warning(f'Error in events bulk_create, will try indiviually, error: {str(exc)}')
logger.warning(f'Error in events bulk_create, will try indiviually up to 5 errors, error {str(exc)}')
# if an exception occurs, we should re-attempt to save the
# events one-by-one, because something in the list is
# broken/stale
consecutive_errors = 0
events_saved = 0
metrics_events_batch_save_errors += 1
for e in events.copy():
for e in events:
try:
e.save()
metrics_singular_events_saved += 1
events.remove(e)
saved_events.append(e) # Importantly, remove successfully saved events from the buffer
events_saved += 1
consecutive_errors = 0
except Exception as exc_indv:
retry_count = getattr(e, '_retry_count', 0) + 1
e._retry_count = retry_count
# special sanitization logic for postgres treatment of NUL 0x00 char
if (retry_count == 1) and isinstance(exc_indv, ValueError) and ("\x00" in e.stdout):
e.stdout = e.stdout.replace("\x00", "")
if retry_count >= self.INDIVIDUAL_EVENT_RETRIES:
logger.error(f'Hit max retries ({retry_count}) saving individual Event error: {str(exc_indv)}\ndata:\n{e.__dict__}')
events.remove(e)
else:
logger.info(f'Database Error Saving individual Event uuid={e.uuid} try={retry_count}, error: {str(exc_indv)}')
consecutive_errors += 1
logger.info(f'Database Error Saving individual Job Event, error {str(exc_indv)}')
if consecutive_errors >= 5:
raise
metrics_singular_events_saved += events_saved
if events_saved == 0:
raise
metrics_duration_to_save = time.perf_counter() - metrics_duration_to_save
for e in saved_events:
for e in events:
if not getattr(e, '_skip_websocket_message', False):
metrics_events_broadcast += 1
emit_event_detail(e)
if getattr(e, '_notification_trigger_event', False):
job_stats_wrapup(getattr(e, e.JOB_REFERENCE), event=e)
self.buff = {}
self.last_flush = time.time()
# only update metrics if we saved events
if (metrics_bulk_events_saved + metrics_singular_events_saved) > 0:
@@ -276,16 +267,20 @@ class CallbackBrokerWorker(BaseWorker):
try:
self.flush(force=flush)
break
except Exception as exc:
# Aside form bugs, exceptions here are assumed to be due to database flake
except (OperationalError, InterfaceError, InternalError) as exc:
if retries >= self.MAX_RETRIES:
logger.exception('Worker could not re-establish database connectivity, giving up on one or more events.')
self.buff = {}
return
delay = 60 * retries
logger.warning(f'Database Error Flushing Job Events, retry #{retries + 1} in {delay} seconds: {str(exc)}')
django_connection.close()
time.sleep(delay)
retries += 1
except Exception:
logger.exception(f'Callback Task Processor Raised Unexpected Exception processing event data:\n{body}')
except DatabaseError:
logger.exception('Database Error Flushing Job Events')
django_connection.close()
break
except Exception as exc:
tb = traceback.format_exc()
logger.error('Callback Task Processor Raised Exception: %r', exc)
logger.error('Detail: {}'.format(tb))

View File

@@ -25,7 +25,7 @@ class Command(BaseCommand):
with connection.cursor() as cursor:
cursor.execute(
f'''
SELECT
SELECT
b.id, b.job_id, b.host_name, b.created - a.created delta,
b.task task,
b.event_data::json->'task_action' task_action,

View File

@@ -38,14 +38,7 @@ class Command(BaseCommand):
(changed, instance) = Instance.objects.register(ip_address=os.environ.get('MY_POD_IP'), node_type='control', uuid=settings.SYSTEM_UUID)
RegisterQueue(settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME, 100, 0, [], is_container_group=False).register()
RegisterQueue(
settings.DEFAULT_EXECUTION_QUEUE_NAME,
100,
0,
[],
is_container_group=True,
pod_spec_override=settings.DEFAULT_EXECUTION_QUEUE_POD_SPEC_OVERRIDE,
max_forks=settings.DEFAULT_EXECUTION_QUEUE_MAX_FORKS,
max_concurrent_jobs=settings.DEFAULT_EXECUTION_QUEUE_MAX_CONCURRENT_JOBS,
settings.DEFAULT_EXECUTION_QUEUE_NAME, 100, 0, [], is_container_group=True, pod_spec_override=settings.DEFAULT_EXECUTION_QUEUE_POD_SPEC_OVERRIDE
).register()
else:
(changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, uuid=uuid)

View File

@@ -32,14 +32,8 @@ class Command(BaseCommand):
def handle(self, **options):
self.old_key = settings.SECRET_KEY
custom_key = os.environ.get("TOWER_SECRET_KEY")
if options.get("use_custom_key"):
if custom_key:
self.new_key = custom_key
else:
print("Use custom key was specified but the env var TOWER_SECRET_KEY was not available")
import sys
sys.exit(1)
if options.get("use_custom_key") and custom_key:
self.new_key = custom_key
else:
self.new_key = base64.encodebytes(os.urandom(33)).decode().rstrip()
self._notification_templates()

View File

@@ -17,9 +17,7 @@ class InstanceNotFound(Exception):
class RegisterQueue:
def __init__(
self, queuename, instance_percent, inst_min, hostname_list, is_container_group=None, pod_spec_override=None, max_forks=None, max_concurrent_jobs=None
):
def __init__(self, queuename, instance_percent, inst_min, hostname_list, is_container_group=None, pod_spec_override=None):
self.instance_not_found_err = None
self.queuename = queuename
self.instance_percent = instance_percent
@@ -27,8 +25,6 @@ class RegisterQueue:
self.hostname_list = hostname_list
self.is_container_group = is_container_group
self.pod_spec_override = pod_spec_override
self.max_forks = max_forks
self.max_concurrent_jobs = max_concurrent_jobs
def get_create_update_instance_group(self):
created = False
@@ -49,14 +45,6 @@ class RegisterQueue:
ig.pod_spec_override = self.pod_spec_override
changed = True
if self.max_forks and (ig.max_forks != self.max_forks):
ig.max_forks = self.max_forks
changed = True
if self.max_concurrent_jobs and (ig.max_concurrent_jobs != self.max_concurrent_jobs):
ig.max_concurrent_jobs = self.max_concurrent_jobs
changed = True
if changed:
ig.save()

View File

@@ -53,7 +53,7 @@ class Command(BaseCommand):
return lines
@classmethod
def get_connection_status(cls, hostnames, data):
def get_connection_status(cls, me, hostnames, data):
host_stats = [('hostname', 'state', 'start time', 'duration (sec)')]
for h in hostnames:
connection_color = '91' # red
@@ -78,7 +78,7 @@ class Command(BaseCommand):
return host_stats
@classmethod
def get_connection_stats(cls, hostnames, data):
def get_connection_stats(cls, me, hostnames, data):
host_stats = [('hostname', 'total', 'per minute')]
for h in hostnames:
h_safe = safe_name(h)
@@ -119,8 +119,8 @@ class Command(BaseCommand):
return
try:
my_hostname = Instance.objects.my_hostname()
logger.info('Active instance with hostname {} is registered.'.format(my_hostname))
me = Instance.objects.me()
logger.info('Active instance with hostname {} is registered.'.format(me.hostname))
except RuntimeError as e:
# the CLUSTER_HOST_ID in the task, and web instance must match and
# ensure network connectivity between the task and web instance
@@ -145,19 +145,19 @@ class Command(BaseCommand):
else:
data[family.name] = family.samples[0].value
my_hostname = Instance.objects.my_hostname()
hostnames = [i.hostname for i in Instance.objects.exclude(hostname=my_hostname)]
me = Instance.objects.me()
hostnames = [i.hostname for i in Instance.objects.exclude(hostname=me.hostname)]
host_stats = Command.get_connection_status(hostnames, data)
host_stats = Command.get_connection_status(me, hostnames, data)
lines = Command._format_lines(host_stats)
print(f'Broadcast websocket connection status from "{my_hostname}" to:')
print(f'Broadcast websocket connection status from "{me.hostname}" to:')
print('\n'.join(lines))
host_stats = Command.get_connection_stats(hostnames, data)
host_stats = Command.get_connection_stats(me, hostnames, data)
lines = Command._format_lines(host_stats)
print(f'\nBroadcast websocket connection stats from "{my_hostname}" to:')
print(f'\nBroadcast websocket connection stats from "{me.hostname}" to:')
print('\n'.join(lines))
return

View File

@@ -99,12 +99,9 @@ class InstanceManager(models.Manager):
instance or role.
"""
def my_hostname(self):
return settings.CLUSTER_HOST_ID
def me(self):
"""Return the currently active instance."""
node = self.filter(hostname=self.my_hostname())
node = self.filter(hostname=settings.CLUSTER_HOST_ID)
if node.exists():
return node[0]
raise RuntimeError("No instance found with the current cluster host id")
@@ -158,11 +155,7 @@ class InstanceManager(models.Manager):
return (False, instance)
# Create new instance, and fill in default values
create_defaults = {
'node_state': Instance.States.INSTALLED,
'capacity': 0,
'listener_port': 27199,
}
create_defaults = {'node_state': Instance.States.INSTALLED, 'capacity': 0}
if defaults is not None:
create_defaults.update(defaults)
uuid_option = {}

View File

@@ -1,14 +1,24 @@
# Generated by Django 3.2.13 on 2022-06-21 21:29
from django.db import migrations
import logging
logger = logging.getLogger("awx")
def forwards(apps, schema_editor):
InventorySource = apps.get_model('main', 'InventorySource')
InventorySource.objects.filter(update_on_project_update=True).update(update_on_launch=True)
Project = apps.get_model('main', 'Project')
Project.objects.filter(scm_inventory_sources__update_on_project_update=True).update(scm_update_on_launch=True)
sources = InventorySource.objects.filter(update_on_project_update=True)
for src in sources:
if src.update_on_launch == False:
src.update_on_launch = True
src.save(update_fields=['update_on_launch'])
logger.info(f"Setting update_on_launch to True for {src}")
proj = src.source_project
if proj and proj.scm_update_on_launch is False:
proj.scm_update_on_launch = True
proj.save(update_fields=['scm_update_on_launch'])
logger.warning(f"Setting scm_update_on_launch to True for {proj}")
class Migration(migrations.Migration):

View File

@@ -1,23 +0,0 @@
# Generated by Django 3.2.13 on 2022-10-24 18:22
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0172_prevent_instance_fallback'),
]
operations = [
migrations.AddField(
model_name='instancegroup',
name='max_concurrent_jobs',
field=models.IntegerField(default=0, help_text='Maximum number of concurrent jobs to run on this group. Zero means no limit.'),
),
migrations.AddField(
model_name='instancegroup',
name='max_forks',
field=models.IntegerField(default=0, help_text='Max forks to execute on this group. Zero means no limit.'),
),
]

View File

@@ -1,18 +0,0 @@
# Generated by Django 3.2.16 on 2022-12-07 21:11
from django.db import migrations
from awx.main.migrations import _rbac as rbac
from awx.main.migrations import _migration_utils as migration_utils
class Migration(migrations.Migration):
dependencies = [
('main', '0173_instancegroup_max_limits'),
]
operations = [
migrations.RunPython(migration_utils.set_current_apps_for_migrations),
migrations.RunPython(rbac.create_roles),
]

View File

@@ -4,7 +4,7 @@ from django.utils.timezone import now
logger = logging.getLogger('awx.main.migrations')
__all__ = ['create_clearsessions_jt', 'create_cleartokens_jt']
__all__ = ['create_collection_jt', 'create_clearsessions_jt', 'create_cleartokens_jt']
'''
These methods are called by migrations to create various system job templates

View File

@@ -44,7 +44,7 @@ def migrate_galaxy_settings(apps, schema_editor):
credential_type=galaxy_type,
inputs={'url': 'https://galaxy.ansible.com/'},
)
except Exception:
except:
# Needed for new migrations, tests
public_galaxy_credential = Credential(
created=now(), modified=now(), name='Ansible Galaxy', managed=True, credential_type=galaxy_type, inputs={'url': 'https://galaxy.ansible.com/'}

View File

@@ -282,7 +282,7 @@ class Credential(PasswordFieldsModel, CommonModelNameNotUnique, ResourceMixin):
return field['default']
if 'default' in kwargs:
return kwargs['default']
raise AttributeError(field_name)
raise AttributeError
if field_name in self.inputs:
return self.inputs[field_name]
if 'default' in kwargs:

View File

@@ -15,7 +15,6 @@ def aws(cred, env, private_data_dir):
if cred.has_input('security_token'):
env['AWS_SECURITY_TOKEN'] = cred.get_input('security_token', default='')
env['AWS_SESSION_TOKEN'] = env['AWS_SECURITY_TOKEN']
def gce(cred, env, private_data_dir):

View File

@@ -233,12 +233,11 @@ class Instance(HasPolicyEditsMixin, BaseModel):
if not isinstance(vargs.get('grace_period'), int):
vargs['grace_period'] = 60 # grace period of 60 minutes, need to set because CLI default will not take effect
if 'exclude_strings' not in vargs and vargs.get('file_pattern'):
active_job_qs = UnifiedJob.objects.filter(status__in=('running', 'waiting'))
if self.node_type == 'execution':
active_job_qs = active_job_qs.filter(execution_node=self.hostname)
else:
active_job_qs = active_job_qs.filter(controller_node=self.hostname)
active_pks = list(active_job_qs.values_list('pk', flat=True))
active_pks = list(
UnifiedJob.objects.filter(
(models.Q(execution_node=self.hostname) | models.Q(controller_node=self.hostname)) & models.Q(status__in=('running', 'waiting'))
).values_list('pk', flat=True)
)
if active_pks:
vargs['exclude_strings'] = [JOB_FOLDER_PREFIX % job_id for job_id in active_pks]
if 'remove_images' in vargs or 'image_prune' in vargs:
@@ -379,8 +378,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
default='',
)
)
max_concurrent_jobs = models.IntegerField(default=0, help_text=_("Maximum number of concurrent jobs to run on this group. Zero means no limit."))
max_forks = models.IntegerField(default=0, help_text=_("Max forks to execute on this group. Zero means no limit."))
policy_instance_percentage = models.IntegerField(default=0, help_text=_("Percentage of Instances to automatically assign to this group"))
policy_instance_minimum = models.IntegerField(default=0, help_text=_("Static minimum number of Instances to automatically assign to this group"))
policy_instance_list = JSONBlob(
@@ -394,8 +391,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
@property
def capacity(self):
if self.is_container_group:
return self.max_forks
return sum(inst.capacity for inst in self.instances.all())
@property

View File

@@ -247,19 +247,6 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
return (number, step)
def get_sliced_hosts(self, host_queryset, slice_number, slice_count):
"""
Returns a slice of Hosts given a slice number and total slice count, or
the original queryset if slicing is not requested.
NOTE: If slicing is performed, this will return a List[Host] with the
resulting slice. If slicing is not performed it will return the
original queryset (not evaluating it or forcing it to a list). This
puts the burden on the caller to check the resulting type. This is
non-ideal because it's easy to get wrong, but I think the only way
around it is to force the queryset which has memory implications for
large inventories.
"""
if slice_count > 1 and slice_number > 0:
offset = slice_number - 1
host_queryset = host_queryset[offset::slice_count]
@@ -567,6 +554,17 @@ class Host(CommonModelNameNotUnique, RelatedJobsMixin):
# Use .job_host_summaries.all() to get jobs affecting this host.
# Use .job_events.all() to get events affecting this host.
'''
We don't use timestamp, but we may in the future.
'''
def update_ansible_facts(self, module, facts, timestamp=None):
if module == "ansible":
self.ansible_facts.update(facts)
else:
self.ansible_facts[module] = facts
self.save()
def get_effective_host_name(self):
"""
Return the name of the host that will be used in actual ansible

View File

@@ -15,7 +15,6 @@ from urllib.parse import urljoin
from django.conf import settings
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models.query import QuerySet
# from django.core.cache import cache
from django.utils.encoding import smart_str
@@ -44,7 +43,7 @@ from awx.main.models.notifications import (
NotificationTemplate,
JobNotificationMixin,
)
from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField, polymorphic, log_excess_runtime
from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField, polymorphic
from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob, OrderedManyToManyField
from awx.main.models.mixins import (
ResourceMixin,
@@ -845,35 +844,22 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
def get_notification_friendly_name(self):
return "Job"
def _get_inventory_hosts(self, only=('name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id'), **filters):
"""Return value is an iterable for the relevant hosts for this job"""
def _get_inventory_hosts(self, only=['name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id']):
if not self.inventory:
return []
host_queryset = self.inventory.hosts.only(*only)
if filters:
host_queryset = host_queryset.filter(**filters)
host_queryset = self.inventory.get_sliced_hosts(host_queryset, self.job_slice_number, self.job_slice_count)
if isinstance(host_queryset, QuerySet):
return host_queryset.iterator()
return host_queryset
return self.inventory.get_sliced_hosts(host_queryset, self.job_slice_number, self.job_slice_count)
@log_excess_runtime(logger, debug_cutoff=0.01, msg='Job {job_id} host facts prepared for {written_ct} hosts, took {delta:.3f} s', add_log_data=True)
def start_job_fact_cache(self, destination, log_data, timeout=None):
def start_job_fact_cache(self, destination, modification_times, timeout=None):
self.log_lifecycle("start_job_fact_cache")
log_data['job_id'] = self.id
log_data['written_ct'] = 0
os.makedirs(destination, mode=0o700)
hosts = self._get_inventory_hosts()
if timeout is None:
timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT
if timeout > 0:
# exclude hosts with fact data older than `settings.ANSIBLE_FACT_CACHE_TIMEOUT seconds`
timeout = now() - datetime.timedelta(seconds=timeout)
hosts = self._get_inventory_hosts(ansible_facts_modified__gte=timeout)
else:
hosts = self._get_inventory_hosts()
last_filepath_written = None
hosts = hosts.filter(ansible_facts_modified__gte=timeout)
for host in hosts:
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
@@ -883,38 +869,23 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
with codecs.open(filepath, 'w', encoding='utf-8') as f:
os.chmod(f.name, 0o600)
json.dump(host.ansible_facts, f)
log_data['written_ct'] += 1
last_filepath_written = filepath
except IOError:
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
continue
# make note of the time we wrote the last file so we can check if any file changed later
if last_filepath_written:
return os.path.getmtime(last_filepath_written)
return None
# make note of the time we wrote the file so we can check if it changed later
modification_times[filepath] = os.path.getmtime(filepath)
@log_excess_runtime(
logger,
debug_cutoff=0.01,
msg='Job {job_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
add_log_data=True,
)
def finish_job_fact_cache(self, destination, facts_write_time, log_data):
def finish_job_fact_cache(self, destination, modification_times):
self.log_lifecycle("finish_job_fact_cache")
log_data['job_id'] = self.id
log_data['updated_ct'] = 0
log_data['unmodified_ct'] = 0
log_data['cleared_ct'] = 0
hosts_to_update = []
for host in self._get_inventory_hosts():
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
continue
if os.path.exists(filepath):
# If the file changed since we wrote the last facts file, pre-playbook run...
# If the file changed since we wrote it pre-playbook run...
modified = os.path.getmtime(filepath)
if (not facts_write_time) or modified > facts_write_time:
if modified > modification_times.get(filepath, 0):
with codecs.open(filepath, 'r', encoding='utf-8') as f:
try:
ansible_facts = json.load(f)
@@ -922,7 +893,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
continue
host.ansible_facts = ansible_facts
host.ansible_facts_modified = now()
hosts_to_update.append(host)
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
system_tracking_logger.info(
'New fact for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)),
extra=dict(
@@ -933,21 +904,12 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
job_id=self.id,
),
)
log_data['updated_ct'] += 1
else:
log_data['unmodified_ct'] += 1
else:
# if the file goes missing, ansible removed it (likely via clear_facts)
host.ansible_facts = {}
host.ansible_facts_modified = now()
hosts_to_update.append(host)
system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
log_data['cleared_ct'] += 1
if len(hosts_to_update) > 100:
self.inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
hosts_to_update = []
if hosts_to_update:
self.inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
host.save()
class LaunchTimeConfigBase(BaseModel):

View File

@@ -471,29 +471,6 @@ class Project(UnifiedJobTemplate, ProjectOptions, ResourceMixin, CustomVirtualEn
def get_absolute_url(self, request=None):
return reverse('api:project_detail', kwargs={'pk': self.pk}, request=request)
def get_reason_if_failed(self):
"""
If the project is in a failed or errored state, return a human-readable
error message explaining why. Otherwise return None.
This is used during validation in the serializer and also by
RunProjectUpdate/RunInventoryUpdate.
"""
if self.status not in ('error', 'failed'):
return None
latest_update = self.project_updates.last()
if latest_update is not None and latest_update.failed:
failed_validation_tasks = latest_update.project_update_events.filter(
event='runner_on_failed',
play="Perform project signature/checksum verification",
)
if failed_validation_tasks:
return _("Last project update failed due to signature validation failure.")
return _("Missing a revision to run due to failed project update.")
'''
RelatedJobsMixin
'''

View File

@@ -153,7 +153,7 @@ class Schedule(PrimordialModel, LaunchTimeConfig):
#
# Find the DTSTART rule or raise an error, its usually the first rule but that is not strictly enforced
start_date_rule = re.sub(r'^.*(DTSTART[^\s]+)\s.*$', r'\1', rrule)
start_date_rule = re.sub('^.*(DTSTART[^\s]+)\s.*$', r'\1', rrule)
if not start_date_rule:
raise ValueError('A DTSTART field needs to be in the rrule')

View File

@@ -1305,8 +1305,6 @@ class UnifiedJob(
status_data['instance_group_name'] = None
elif status in ['successful', 'failed', 'canceled'] and self.finished:
status_data['finished'] = datetime.datetime.strftime(self.finished, "%Y-%m-%dT%H:%M:%S.%fZ")
elif status == 'running':
status_data['started'] = datetime.datetime.strftime(self.finished, "%Y-%m-%dT%H:%M:%S.%fZ")
status_data.update(self.websocket_emit_data())
status_data['group_name'] = 'jobs'
if getattr(self, 'unified_job_template_id', None):
@@ -1351,12 +1349,12 @@ class UnifiedJob(
if required in defined_fields and not credential.has_input(required):
missing_credential_inputs.append(required)
if missing_credential_inputs:
self.job_explanation = '{} cannot start because Credential {} does not provide one or more required fields ({}).'.format(
self._meta.verbose_name.title(), credential.name, ', '.join(sorted(missing_credential_inputs))
)
self.save(update_fields=['job_explanation'])
return (False, None)
if missing_credential_inputs:
self.job_explanation = '{} cannot start because Credential {} does not provide one or more required fields ({}).'.format(
self._meta.verbose_name.title(), credential.name, ', '.join(sorted(missing_credential_inputs))
)
self.save(update_fields=['job_explanation'])
return (False, None)
needed = self.get_passwords_needed_to_start()
try:
@@ -1467,23 +1465,23 @@ class UnifiedJob(
self.job_explanation = job_explanation
cancel_fields.append('job_explanation')
# Important to save here before sending cancel signal to dispatcher to cancel because
# the job control process will use the cancel_flag to distinguish a shutdown from a cancel
self.save(update_fields=cancel_fields)
controller_notified = False
if self.celery_task_id:
controller_notified = self.cancel_dispatcher_process()
else:
# Avoid race condition where we have stale model from pending state but job has already started,
# its checking signal but not cancel_flag, so re-send signal after this database commit
connection.on_commit(self.fallback_cancel)
# If a SIGTERM signal was sent to the control process, and acked by the dispatcher
# then we want to let its own cleanup change status, otherwise change status now
if not controller_notified:
if self.status != 'canceled':
self.status = 'canceled'
self.save(update_fields=['status'])
# Avoid race condition where we have stale model from pending state but job has already started,
# its checking signal but not cancel_flag, so re-send signal after updating cancel fields
self.fallback_cancel()
cancel_fields.append('status')
self.save(update_fields=cancel_fields)
return self.cancel_flag

View File

@@ -5,6 +5,9 @@ import json
import logging
import requests
from django.utils.encoding import smart_str
from django.utils.translation import gettext_lazy as _
from awx.main.notifications.base import AWXBaseEmailBackend
from awx.main.utils import get_awx_http_client_headers
from awx.main.notifications.custom_notification_base import CustomNotificationBase
@@ -14,8 +17,6 @@ logger = logging.getLogger('awx.main.notifications.webhook_backend')
class WebhookBackend(AWXBaseEmailBackend, CustomNotificationBase):
MAX_RETRIES = 5
init_parameters = {
"url": {"label": "Target URL", "type": "string"},
"http_method": {"label": "HTTP Method", "type": "string", "default": "POST"},
@@ -63,67 +64,20 @@ class WebhookBackend(AWXBaseEmailBackend, CustomNotificationBase):
if self.http_method.lower() not in ['put', 'post']:
raise ValueError("HTTP method must be either 'POST' or 'PUT'.")
chosen_method = getattr(requests, self.http_method.lower(), None)
for m in messages:
auth = None
if self.username or self.password:
auth = (self.username, self.password)
# the constructor for EmailMessage - https://docs.djangoproject.com/en/4.1/_modules/django/core/mail/message will turn an empty dictionary to an empty string
# sometimes an empty dict is intentional and we added this conditional to enforce that
if not m.body:
m.body = {}
url = str(m.recipients()[0])
data = json.dumps(m.body, ensure_ascii=False).encode('utf-8')
headers = {**(get_awx_http_client_headers()), **(self.headers or {})}
err = None
for retries in range(self.MAX_RETRIES):
# Sometimes we hit redirect URLs. We must account for this. We still extract the redirect URL from the response headers and try again. Max retires == 5
resp = chosen_method(
url=url,
auth=auth,
data=data,
headers=headers,
verify=(not self.disable_ssl_verification),
allow_redirects=False, # override default behaviour for redirects
)
# either success or error reached if this conditional fires
if resp.status_code not in [301, 307]:
break
# we've hit a redirect. extract the redirect URL out of the first response header and try again
logger.warning(
f"Received a {resp.status_code} from {url}, trying to reach redirect url {resp.headers.get('Location', None)}; attempt #{retries+1}"
)
# take the first redirect URL in the response header and try that
url = resp.headers.get("Location", None)
if url is None:
err = f"Webhook notification received redirect to a blank URL from {url}. Response headers={resp.headers}"
break
else:
# no break condition in the loop encountered; therefore we have hit the maximum number of retries
err = f"Webhook notification max number of retries [{self.MAX_RETRIES}] exceeded. Failed to send webhook notification to {url}"
if resp.status_code >= 400:
err = f"Error sending webhook notification: {resp.status_code}"
# log error message
if err:
logger.error(err)
r = chosen_method(
"{}".format(m.recipients()[0]),
auth=auth,
data=json.dumps(m.body, ensure_ascii=False).encode('utf-8'),
headers=dict(list(get_awx_http_client_headers().items()) + list((self.headers or {}).items())),
verify=(not self.disable_ssl_verification),
)
if r.status_code >= 400:
logger.error(smart_str(_("Error sending notification webhook: {}").format(r.status_code)))
if not self.fail_silently:
raise Exception(err)
# no errors were encountered therefore we successfully sent off the notification webhook
if resp.status_code in range(200, 299):
logger.debug(f"Notification webhook successfully sent to {url}. Received {resp.status_code}")
sent_messages += 1
raise Exception(smart_str(_("Error sending notification webhook: {}").format(r.status_code)))
sent_messages += 1
return sent_messages

View File

@@ -3,8 +3,6 @@
from django.db.models.signals import pre_save, post_save, pre_delete, m2m_changed
from taggit.managers import TaggableManager
class ActivityStreamRegistrar(object):
def __init__(self):
@@ -21,8 +19,6 @@ class ActivityStreamRegistrar(object):
pre_delete.connect(activity_stream_delete, sender=model, dispatch_uid=str(self.__class__) + str(model) + "_delete")
for m2mfield in model._meta.many_to_many:
if isinstance(m2mfield, TaggableManager):
continue # Special case for taggit app
try:
m2m_attr = getattr(model, m2mfield.name)
m2m_changed.connect(

View File

@@ -27,8 +27,8 @@ class AWXProtocolTypeRouter(ProtocolTypeRouter):
websocket_urlpatterns = [
re_path(r'websocket/$', consumers.EventConsumer.as_asgi()),
re_path(r'websocket/broadcast/$', consumers.BroadcastConsumer.as_asgi()),
re_path(r'websocket/$', consumers.EventConsumer),
re_path(r'websocket/broadcast/$', consumers.BroadcastConsumer),
]
application = AWXProtocolTypeRouter(

View File

@@ -39,11 +39,12 @@ from awx.main.utils import (
ScheduleTaskManager,
ScheduleWorkflowManager,
)
from awx.main.utils.common import task_manager_bulk_reschedule, is_testing
from awx.main.utils.common import task_manager_bulk_reschedule
from awx.main.signals import disable_activity_stream
from awx.main.constants import ACTIVE_STATES
from awx.main.scheduler.dependency_graph import DependencyGraph
from awx.main.scheduler.task_manager_models import TaskManagerModels
from awx.main.scheduler.task_manager_models import TaskManagerInstances
from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups
import awx.main.analytics.subsystem_metrics as s_metrics
from awx.main.utils import decrypt_field
@@ -70,12 +71,7 @@ class TaskBase:
# is called later.
self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False)
self.start_time = time.time()
# We want to avoid calling settings in loops, so cache these settings at init time
self.start_task_limit = settings.START_TASK_LIMIT
self.task_manager_timeout = settings.TASK_MANAGER_TIMEOUT
self.control_task_impact = settings.AWX_CONTROL_NODE_TASK_IMPACT
for m in self.subsystem_metrics.METRICS:
if m.startswith(self.prefix):
self.subsystem_metrics.set(m, 0)
@@ -83,7 +79,7 @@ class TaskBase:
def timed_out(self):
"""Return True/False if we have met or exceeded the timeout for the task manager."""
elapsed = time.time() - self.start_time
if elapsed >= self.task_manager_timeout:
if elapsed >= settings.TASK_MANAGER_TIMEOUT:
logger.warning(f"{self.prefix} manager has run for {elapsed} which is greater than TASK_MANAGER_TIMEOUT of {settings.TASK_MANAGER_TIMEOUT}.")
return True
return False
@@ -101,7 +97,7 @@ class TaskBase:
self.all_tasks = [t for t in qs]
def record_aggregate_metrics(self, *args):
if not is_testing():
if not settings.IS_TESTING():
# increment task_manager_schedule_calls regardless if the other
# metrics are recorded
s_metrics.Metrics(auto_pipe_execute=True).inc(f"{self.prefix}__schedule_calls", 1)
@@ -475,8 +471,9 @@ class TaskManager(TaskBase):
Init AFTER we know this instance of the task manager will run because the lock is acquired.
"""
self.dependency_graph = DependencyGraph()
self.tm_models = TaskManagerModels()
self.controlplane_ig = self.tm_models.instance_groups.controlplane_ig
self.instances = TaskManagerInstances(self.all_tasks)
self.instance_groups = TaskManagerInstanceGroups(instances_by_hostname=self.instances)
self.controlplane_ig = self.instance_groups.controlplane_ig
def job_blocked_by(self, task):
# TODO: I'm not happy with this, I think blocking behavior should be decided outside of the dependency graph
@@ -507,16 +504,8 @@ class TaskManager(TaskBase):
return None
@timeit
def start_task(self, task, instance_group, instance=None):
# Just like for process_running_tasks, add the job to the dependency graph and
# ask the TaskManagerInstanceGroups object to update consumed capacity on all
# implicated instances and container groups.
def start_task(self, task, instance_group, dependent_tasks=None, instance=None):
self.dependency_graph.add_job(task)
if instance_group is not None:
task.instance_group = instance_group
# We need the instance group assigned to correctly account for container group max_concurrent_jobs and max_forks
self.tm_models.consume_capacity(task)
self.subsystem_metrics.inc(f"{self.prefix}_tasks_started", 1)
self.start_task_limit -= 1
if self.start_task_limit == 0:
@@ -524,6 +513,20 @@ class TaskManager(TaskBase):
ScheduleTaskManager().schedule()
from awx.main.tasks.system import handle_work_error, handle_work_success
# update capacity for control node and execution node
if task.controller_node:
self.instances[task.controller_node].consume_capacity(settings.AWX_CONTROL_NODE_TASK_IMPACT)
if task.execution_node:
self.instances[task.execution_node].consume_capacity(task.task_impact)
dependent_tasks = dependent_tasks or []
task_actual = {
'type': get_type_for_model(type(task)),
'id': task.id,
}
dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]
task.status = 'waiting'
(start_status, opts) = task.pre_start()
@@ -543,6 +546,7 @@ class TaskManager(TaskBase):
ScheduleWorkflowManager().schedule()
# at this point we already have control/execution nodes selected for the following cases
else:
task.instance_group = instance_group
execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else ''
logger.debug(
f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {instance_group.name}{execution_node_msg}.'
@@ -555,7 +559,6 @@ class TaskManager(TaskBase):
# apply_async does a NOTIFY to the channel dispatcher is listening to
# postgres will treat this as part of the transaction, which is what we want
if task.status != 'failed' and type(task) is not WorkflowJob:
task_actual = {'type': get_type_for_model(type(task)), 'id': task.id}
task_cls = task._get_task_class()
task_cls.apply_async(
[task.pk],
@@ -563,7 +566,7 @@ class TaskManager(TaskBase):
queue=task.get_queue_name(),
uuid=task.celery_task_id,
callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
errbacks=[{'task': handle_work_error.name, 'kwargs': {'task_actual': task_actual}}],
errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}],
)
# In exception cases, like a job failing pre-start checks, we send the websocket status message
@@ -577,7 +580,6 @@ class TaskManager(TaskBase):
if type(task) is WorkflowJob:
ScheduleWorkflowManager().schedule()
self.dependency_graph.add_job(task)
self.tm_models.consume_capacity(task)
@timeit
def process_pending_tasks(self, pending_tasks):
@@ -602,18 +604,18 @@ class TaskManager(TaskBase):
if isinstance(task, WorkflowJob):
# Previously we were tracking allow_simultaneous blocking both here and in DependencyGraph.
# Double check that using just the DependencyGraph works for Workflows and Sliced Jobs.
self.start_task(task, None, None)
self.start_task(task, None, task.get_jobs_fail_chain(), None)
continue
found_acceptable_queue = False
# Determine if there is control capacity for the task
if task.capacity_type == 'control':
control_impact = task.task_impact + self.control_task_impact
control_impact = task.task_impact + settings.AWX_CONTROL_NODE_TASK_IMPACT
else:
control_impact = self.control_task_impact
control_instance = self.tm_models.instance_groups.fit_task_to_most_remaining_capacity_instance(
task, instance_group_name=self.controlplane_ig.name, impact=control_impact, capacity_type='control'
control_impact = settings.AWX_CONTROL_NODE_TASK_IMPACT
control_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
task, instance_group_name=settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME, impact=control_impact, capacity_type='control'
)
if not control_instance:
self.task_needs_capacity(task, tasks_to_update_job_explanation)
@@ -624,29 +626,25 @@ class TaskManager(TaskBase):
# All task.capacity_type == 'control' jobs should run on control plane, no need to loop over instance groups
if task.capacity_type == 'control':
if not self.tm_models.instance_groups[self.controlplane_ig.name].has_remaining_capacity(control_impact=True):
continue
task.execution_node = control_instance.hostname
execution_instance = self.tm_models.instances[control_instance.hostname].obj
execution_instance = self.instances[control_instance.hostname].obj
task.log_lifecycle("controller_node_chosen")
task.log_lifecycle("execution_node_chosen")
self.start_task(task, self.controlplane_ig, execution_instance)
self.start_task(task, self.controlplane_ig, task.get_jobs_fail_chain(), execution_instance)
found_acceptable_queue = True
continue
for instance_group in self.tm_models.instance_groups.get_instance_groups_from_task_cache(task):
if not self.tm_models.instance_groups[instance_group.name].has_remaining_capacity(task):
continue
for instance_group in self.instance_groups.get_instance_groups_from_task_cache(task):
if instance_group.is_container_group:
self.start_task(task, instance_group, None)
self.start_task(task, instance_group, task.get_jobs_fail_chain(), None)
found_acceptable_queue = True
break
# at this point we know the instance group is NOT a container group
# because if it was, it would have started the task and broke out of the loop.
execution_instance = self.tm_models.instance_groups.fit_task_to_most_remaining_capacity_instance(
execution_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
task, instance_group_name=instance_group.name, add_hybrid_control_cost=True
) or self.tm_models.instance_groups.find_largest_idle_instance(instance_group_name=instance_group.name, capacity_type=task.capacity_type)
) or self.instance_groups.find_largest_idle_instance(instance_group_name=instance_group.name, capacity_type=task.capacity_type)
if execution_instance:
task.execution_node = execution_instance.hostname
@@ -662,8 +660,8 @@ class TaskManager(TaskBase):
task.log_format, instance_group.name, execution_instance.hostname, execution_instance.remaining_capacity
)
)
execution_instance = self.tm_models.instances[execution_instance.hostname].obj
self.start_task(task, instance_group, execution_instance)
execution_instance = self.instances[execution_instance.hostname].obj
self.start_task(task, instance_group, task.get_jobs_fail_chain(), execution_instance)
found_acceptable_queue = True
break
else:

View File

@@ -15,18 +15,15 @@ logger = logging.getLogger('awx.main.scheduler')
class TaskManagerInstance:
"""A class representing minimal data the task manager needs to represent an Instance."""
def __init__(self, obj, **kwargs):
def __init__(self, obj):
self.obj = obj
self.node_type = obj.node_type
self.consumed_capacity = 0
self.capacity = obj.capacity
self.hostname = obj.hostname
self.jobs_running = 0
def consume_capacity(self, impact, job_impact=False):
def consume_capacity(self, impact):
self.consumed_capacity += impact
if job_impact:
self.jobs_running += 1
@property
def remaining_capacity(self):
@@ -36,106 +33,9 @@ class TaskManagerInstance:
return remaining
class TaskManagerInstanceGroup:
"""A class representing minimal data the task manager needs to represent an InstanceGroup."""
def __init__(self, obj, task_manager_instances=None, **kwargs):
self.name = obj.name
self.is_container_group = obj.is_container_group
self.container_group_jobs = 0
self.container_group_consumed_forks = 0
_instances = obj.instances.all()
# We want the list of TaskManagerInstance objects because these are shared across the TaskManagerInstanceGroup objects.
# This way when we consume capacity on an instance that is in multiple groups, we tabulate across all the groups correctly.
self.instances = [task_manager_instances[instance.hostname] for instance in _instances if instance.hostname in task_manager_instances]
self.instance_hostnames = tuple([instance.hostname for instance in _instances if instance.hostname in task_manager_instances])
self.max_concurrent_jobs = obj.max_concurrent_jobs
self.max_forks = obj.max_forks
self.control_task_impact = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
def consume_capacity(self, task):
"""We only consume capacity on an instance group level if it is a container group. Otherwise we consume capacity on an instance level."""
if self.is_container_group:
self.container_group_jobs += 1
self.container_group_consumed_forks += task.task_impact
else:
raise RuntimeError("We only track capacity for container groups at the instance group level. Otherwise, consume capacity on instances.")
def get_remaining_instance_capacity(self):
return sum(inst.remaining_capacity for inst in self.instances)
def get_instance_capacity(self):
return sum(inst.capacity for inst in self.instances)
def get_consumed_instance_capacity(self):
return sum(inst.consumed_capacity for inst in self.instances)
def get_instance_jobs_running(self):
return sum(inst.jobs_running for inst in self.instances)
def get_jobs_running(self):
if self.is_container_group:
return self.container_group_jobs
return sum(inst.jobs_running for inst in self.instances)
def get_capacity(self):
"""This reports any type of capacity, including that of container group jobs.
Container groups don't really have capacity, but if they have max_forks set,
we can interperet that as how much capacity the user has defined them to have.
"""
if self.is_container_group:
return self.max_forks
return self.get_instance_capacity()
def get_consumed_capacity(self):
if self.is_container_group:
return self.container_group_consumed_forks
return self.get_consumed_instance_capacity()
def get_remaining_capacity(self):
return self.get_capacity() - self.get_consumed_capacity()
def has_remaining_capacity(self, task=None, control_impact=False):
"""Pass either a task or control_impact=True to determine if the IG has capacity to run the control task or job task."""
task_impact = self.control_task_impact if control_impact else task.task_impact
job_impact = 0 if control_impact else 1
task_string = f"task {task.log_format} with impact of {task_impact}" if task else f"control task with impact of {task_impact}"
# We only want to loop over instances if self.max_concurrent_jobs is set
if self.max_concurrent_jobs == 0:
# Override the calculated remaining capacity, because when max_concurrent_jobs == 0 we don't enforce any max
remaining_jobs = 0
else:
remaining_jobs = self.max_concurrent_jobs - self.get_jobs_running() - job_impact
# We only want to loop over instances if self.max_forks is set
if self.max_forks == 0:
# Override the calculated remaining capacity, because when max_forks == 0 we don't enforce any max
remaining_forks = 0
else:
remaining_forks = self.max_forks - self.get_consumed_capacity() - task_impact
if remaining_jobs < 0 or remaining_forks < 0:
# A value less than zero means the task will not fit on the group
if remaining_jobs < 0:
logger.debug(f"{task_string} cannot fit on instance group {self.name} with {remaining_jobs} remaining jobs")
if remaining_forks < 0:
logger.debug(f"{task_string} cannot fit on instance group {self.name} with {remaining_forks} remaining forks")
return False
# Returning true means there is enough remaining capacity on the group to run the task (or no instance group level limits are being set)
logger.debug(f"{task_string} can fit on instance group {self.name} with {remaining_forks} remaining forks and {remaining_jobs}")
return True
class TaskManagerInstances:
def __init__(self, instances=None, instance_fields=('node_type', 'capacity', 'hostname', 'enabled'), **kwargs):
def __init__(self, active_tasks, instances=None, instance_fields=('node_type', 'capacity', 'hostname', 'enabled')):
self.instances_by_hostname = dict()
self.instance_groups_container_group_jobs = dict()
self.instance_groups_container_group_consumed_forks = dict()
self.control_task_impact = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
if instances is None:
instances = (
Instance.objects.filter(hostname__isnull=False, node_state=Instance.States.READY, enabled=True)
@@ -143,15 +43,18 @@ class TaskManagerInstances:
.only('node_type', 'node_state', 'capacity', 'hostname', 'enabled')
)
for instance in instances:
self.instances_by_hostname[instance.hostname] = TaskManagerInstance(instance, **kwargs)
self.instances_by_hostname[instance.hostname] = TaskManagerInstance(instance)
def consume_capacity(self, task):
control_instance = self.instances_by_hostname.get(task.controller_node, '')
execution_instance = self.instances_by_hostname.get(task.execution_node, '')
if execution_instance and execution_instance.node_type in ('hybrid', 'execution'):
self.instances_by_hostname[task.execution_node].consume_capacity(task.task_impact, job_impact=True)
if control_instance and control_instance.node_type in ('hybrid', 'control'):
self.instances_by_hostname[task.controller_node].consume_capacity(self.control_task_impact)
# initialize remaining capacity based on currently waiting and running tasks
for task in active_tasks:
if task.status not in ['waiting', 'running']:
continue
control_instance = self.instances_by_hostname.get(task.controller_node, '')
execution_instance = self.instances_by_hostname.get(task.execution_node, '')
if execution_instance and execution_instance.node_type in ('hybrid', 'execution'):
self.instances_by_hostname[task.execution_node].consume_capacity(task.task_impact)
if control_instance and control_instance.node_type in ('hybrid', 'control'):
self.instances_by_hostname[task.controller_node].consume_capacity(settings.AWX_CONTROL_NODE_TASK_IMPACT)
def __getitem__(self, hostname):
return self.instances_by_hostname.get(hostname)
@@ -161,57 +64,42 @@ class TaskManagerInstances:
class TaskManagerInstanceGroups:
"""A class representing minimal data the task manager needs to represent all the InstanceGroups."""
"""A class representing minimal data the task manager needs to represent an InstanceGroup."""
def __init__(self, task_manager_instances=None, instance_groups=None, instance_groups_queryset=None, **kwargs):
def __init__(self, instances_by_hostname=None, instance_groups=None, instance_groups_queryset=None):
self.instance_groups = dict()
self.task_manager_instances = task_manager_instances if task_manager_instances is not None else TaskManagerInstances()
self.controlplane_ig = None
self.pk_ig_map = dict()
self.control_task_impact = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
self.controlplane_ig_name = kwargs.get('controlplane_ig_name', settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME)
if instance_groups is not None: # for testing
self.instance_groups = {ig.name: TaskManagerInstanceGroup(ig, self.task_manager_instances, **kwargs) for ig in instance_groups}
self.pk_ig_map = {ig.pk: ig for ig in instance_groups}
self.instance_groups = instance_groups
else:
if instance_groups_queryset is None:
instance_groups_queryset = InstanceGroup.objects.prefetch_related('instances').only(
'name', 'instances', 'max_concurrent_jobs', 'max_forks', 'is_container_group'
)
instance_groups_queryset = InstanceGroup.objects.prefetch_related('instances').only('name', 'instances')
for instance_group in instance_groups_queryset:
if instance_group.name == self.controlplane_ig_name:
if instance_group.name == settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME:
self.controlplane_ig = instance_group
self.instance_groups[instance_group.name] = TaskManagerInstanceGroup(instance_group, self.task_manager_instances, **kwargs)
self.instance_groups[instance_group.name] = dict(
instances=[
instances_by_hostname[instance.hostname] for instance in instance_group.instances.all() if instance.hostname in instances_by_hostname
],
)
self.pk_ig_map[instance_group.pk] = instance_group
def __getitem__(self, ig_name):
return self.instance_groups.get(ig_name)
def __contains__(self, ig_name):
return ig_name in self.instance_groups
def get_remaining_capacity(self, group_name):
return self.instance_groups[group_name].get_remaining_instance_capacity()
instances = self.instance_groups[group_name]['instances']
return sum(inst.remaining_capacity for inst in instances)
def get_consumed_capacity(self, group_name):
return self.instance_groups[group_name].get_consumed_capacity()
def get_jobs_running(self, group_name):
return self.instance_groups[group_name].get_jobs_running()
def get_capacity(self, group_name):
return self.instance_groups[group_name].get_capacity()
def get_instances(self, group_name):
return self.instance_groups[group_name].instances
instances = self.instance_groups[group_name]['instances']
return sum(inst.consumed_capacity for inst in instances)
def fit_task_to_most_remaining_capacity_instance(self, task, instance_group_name, impact=None, capacity_type=None, add_hybrid_control_cost=False):
impact = impact if impact else task.task_impact
capacity_type = capacity_type if capacity_type else task.capacity_type
instance_most_capacity = None
most_remaining_capacity = -1
instances = self.instance_groups[instance_group_name].instances
instances = self.instance_groups[instance_group_name]['instances']
for i in instances:
if i.node_type not in (capacity_type, 'hybrid'):
@@ -219,7 +107,7 @@ class TaskManagerInstanceGroups:
would_be_remaining = i.remaining_capacity - impact
# hybrid nodes _always_ control their own tasks
if add_hybrid_control_cost and i.node_type == 'hybrid':
would_be_remaining -= self.control_task_impact
would_be_remaining -= settings.AWX_CONTROL_NODE_TASK_IMPACT
if would_be_remaining >= 0 and (instance_most_capacity is None or would_be_remaining > most_remaining_capacity):
instance_most_capacity = i
most_remaining_capacity = would_be_remaining
@@ -227,13 +115,10 @@ class TaskManagerInstanceGroups:
def find_largest_idle_instance(self, instance_group_name, capacity_type='execution'):
largest_instance = None
instances = self.instance_groups[instance_group_name].instances
instances = self.instance_groups[instance_group_name]['instances']
for i in instances:
if i.node_type not in (capacity_type, 'hybrid'):
continue
if i.capacity <= 0:
# We don't want to select an idle instance with 0 capacity
continue
if (hasattr(i, 'jobs_running') and i.jobs_running == 0) or i.remaining_capacity == i.capacity:
if largest_instance is None:
largest_instance = i
@@ -254,56 +139,3 @@ class TaskManagerInstanceGroups:
logger.warn(f"No instance groups in cache exist, defaulting to global instance groups for task {task}")
return task.global_instance_groups
return igs
class TaskManagerModels:
def __init__(self, **kwargs):
# We want to avoid calls to settings over and over in loops, so cache this information here
kwargs['control_task_impact'] = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
kwargs['controlplane_ig_name'] = kwargs.get('controlplane_ig_name', settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME)
self.instances = TaskManagerInstances(**kwargs)
self.instance_groups = TaskManagerInstanceGroups(task_manager_instances=self.instances, **kwargs)
@classmethod
def init_with_consumed_capacity(cls, **kwargs):
tmm = cls(**kwargs)
tasks = kwargs.get('tasks', None)
if tasks is None:
instance_group_queryset = kwargs.get('instance_groups_queryset', None)
# No tasks were provided, so we will fetch them from the database
task_status_filter_list = kwargs.get('task_status_filter_list', ['running', 'waiting'])
task_fields = kwargs.get('task_fields', ('task_impact', 'controller_node', 'execution_node', 'instance_group'))
from awx.main.models import UnifiedJob
if instance_group_queryset is not None:
logger.debug("******************INSTANCE GROUP QUERYSET PASSED -- FILTERING TASKS ****************************")
# Sometimes things like the serializer pass a queryset that looks at not all instance groups. in this case,
# we also need to filter the tasks we look at
tasks = UnifiedJob.objects.filter(status__in=task_status_filter_list, instance_group__in=[ig.id for ig in instance_group_queryset]).only(
*task_fields
)
else:
# No instance group query set, look at all tasks in whole system
tasks = UnifiedJob.objects.filter(status__in=task_status_filter_list).only(*task_fields)
for task in tasks:
tmm.consume_capacity(task)
return tmm
def consume_capacity(self, task):
# Consume capacity on instances, which bubbles up to instance groups they are a member of
self.instances.consume_capacity(task)
# For container group jobs, additionally we must account for capacity consumed since
# The container groups have no instances to look at to track how many jobs/forks are consumed
if task.instance_group_id:
if not task.instance_group_id in self.instance_groups.pk_ig_map.keys():
logger.warn(
f"Task {task.log_format} assigned {task.instance_group_id} but this instance group not present in map of instance groups{self.instance_groups.pk_ig_map.keys()}"
)
else:
ig = self.instance_groups.pk_ig_map[task.instance_group_id]
if ig.is_container_group:
self.instance_groups[ig.name].consume_capacity(task)

View File

@@ -2,6 +2,8 @@ import json
import time
import logging
from collections import deque
import os
import stat
# Django
from django.conf import settings
@@ -204,6 +206,21 @@ class RunnerCallback:
self.instance = self.update_model(self.instance.pk, job_args=json.dumps(runner_config.command), job_cwd=runner_config.cwd, job_env=job_env)
# We opened a connection just for that save, close it here now
connections.close_all()
elif status_data['status'] == 'failed':
# For encrypted ssh_key_data, ansible-runner worker will open and write the
# ssh_key_data to a named pipe. Then, once the podman container starts, ssh-agent will
# read from this named pipe so that the key can be used in ansible-playbook.
# Once the podman container exits, the named pipe is deleted.
# However, if the podman container fails to start in the first place, e.g. the image
# name is incorrect, then this pipe is not cleaned up. Eventually ansible-runner
# processor will attempt to write artifacts to the private data dir via unstream_dir, requiring
# that it open this named pipe. This leads to a hang. Thus, before any artifacts
# are written by the processor, it's important to remove this ssh_key_data pipe.
private_data_dir = self.instance.job_env.get('AWX_PRIVATE_DATA_DIR', None)
if private_data_dir:
key_data_file = os.path.join(private_data_dir, 'artifacts', str(self.instance.id), 'ssh_key_data')
if os.path.exists(key_data_file) and stat.S_ISFIFO(os.stat(key_data_file).st_mode):
os.remove(key_data_file)
elif status_data['status'] == 'error':
result_traceback = status_data.get('result_traceback', None)
if result_traceback:

View File

@@ -390,7 +390,6 @@ class BaseTask(object):
logger.error("I/O error({0}) while trying to open lock file [{1}]: {2}".format(e.errno, lock_path, e.strerror))
raise
emitted_lockfile_log = False
start_time = time.time()
while True:
try:
@@ -402,9 +401,6 @@ class BaseTask(object):
logger.error("I/O error({0}) while trying to aquire lock on file [{1}]: {2}".format(e.errno, lock_path, e.strerror))
raise
else:
if not emitted_lockfile_log:
logger.info(f"exception acquiring lock {lock_path}: {e}")
emitted_lockfile_log = True
time.sleep(1.0)
self.instance.refresh_from_db(fields=['cancel_flag'])
if self.instance.cancel_flag or signal_callback():
@@ -430,7 +426,7 @@ class BaseTask(object):
"""
instance.log_lifecycle("post_run")
def final_run_hook(self, instance, status, private_data_dir):
def final_run_hook(self, instance, status, private_data_dir, fact_modification_times):
"""
Hook for any steps to run after job/task is marked as complete.
"""
@@ -473,6 +469,7 @@ class BaseTask(object):
self.instance = self.update_model(pk, status='running', start_args='') # blank field to remove encrypted passwords
self.instance.websocket_emit_status("running")
status, rc = 'error', None
fact_modification_times = {}
self.runner_callback.event_ct = 0
'''
@@ -501,6 +498,14 @@ class BaseTask(object):
if not os.path.exists(settings.AWX_ISOLATION_BASE_PATH):
raise RuntimeError('AWX_ISOLATION_BASE_PATH=%s does not exist' % settings.AWX_ISOLATION_BASE_PATH)
# Fetch "cached" fact data from prior runs and put on the disk
# where ansible expects to find it
if getattr(self.instance, 'use_fact_cache', False):
self.instance.start_job_fact_cache(
os.path.join(private_data_dir, 'artifacts', str(self.instance.id), 'fact_cache'),
fact_modification_times,
)
# May have to serialize the value
private_data_files, ssh_key_data = self.build_private_data_files(self.instance, private_data_dir)
passwords = self.build_passwords(self.instance, kwargs)
@@ -641,7 +646,7 @@ class BaseTask(object):
self.instance.send_notification_templates('succeeded' if status == 'successful' else 'failed')
try:
self.final_run_hook(self.instance, status, private_data_dir)
self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times)
except Exception:
logger.exception('{} Final run hook errored.'.format(self.instance.log_format))
@@ -695,7 +700,7 @@ class SourceControlMixin(BaseTask):
def spawn_project_sync(self, project, sync_needs, scm_branch=None):
pu_ig = self.instance.instance_group
pu_en = Instance.objects.my_hostname()
pu_en = Instance.objects.me().hostname
sync_metafields = dict(
launch_type="sync",
@@ -762,10 +767,6 @@ class SourceControlMixin(BaseTask):
try:
original_branch = None
failed_reason = project.get_reason_if_failed()
if failed_reason:
self.update_model(self.instance.pk, status='failed', job_explanation=failed_reason)
raise RuntimeError(failed_reason)
project_path = project.get_project_path(check_if_exists=False)
if project.scm_type == 'git' and (scm_branch and scm_branch != project.scm_branch):
if os.path.exists(project_path):
@@ -1055,25 +1056,22 @@ class RunJob(SourceControlMixin, BaseTask):
error = _('Job could not start because no Execution Environment could be found.')
self.update_model(job.pk, status='error', job_explanation=error)
raise RuntimeError(error)
elif job.project.status in ('error', 'failed'):
msg = _('The project revision for this job template is unknown due to a failed update.')
job = self.update_model(job.pk, status='failed', job_explanation=msg)
raise RuntimeError(msg)
if job.inventory.kind == 'smart':
# cache smart inventory memberships so that the host_filter query is not
# ran inside of the event saving code
update_smart_memberships_for_inventory(job.inventory)
# Fetch "cached" fact data from prior runs and put on the disk
# where ansible expects to find it
if job.use_fact_cache:
self.facts_write_time = self.instance.start_job_fact_cache(os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'))
def build_project_dir(self, job, private_data_dir):
self.sync_and_copy(job.project, private_data_dir, scm_branch=job.scm_branch)
def post_run_hook(self, job, status):
super(RunJob, self).post_run_hook(job, status)
job.refresh_from_db(fields=['job_env'])
private_data_dir = job.job_env.get('AWX_PRIVATE_DATA_DIR')
if (not private_data_dir) or (not hasattr(self, 'facts_write_time')):
def final_run_hook(self, job, status, private_data_dir, fact_modification_times):
super(RunJob, self).final_run_hook(job, status, private_data_dir, fact_modification_times)
if not private_data_dir:
# If there's no private data dir, that means we didn't get into the
# actual `run()` call; this _usually_ means something failed in
# the pre_run_hook method
@@ -1081,11 +1079,9 @@ class RunJob(SourceControlMixin, BaseTask):
if job.use_fact_cache:
job.finish_job_fact_cache(
os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'),
self.facts_write_time,
fact_modification_times,
)
def final_run_hook(self, job, status, private_data_dir):
super(RunJob, self).final_run_hook(job, status, private_data_dir)
try:
inventory = job.inventory
except Inventory.DoesNotExist:

View File

@@ -61,15 +61,10 @@ def read_receptor_config():
return yaml.safe_load(f)
def work_signing_enabled(config_data):
for section in config_data:
if 'work-signing' in section:
return True
return False
def get_receptor_sockfile():
data = read_receptor_config()
def get_receptor_sockfile(config_data):
for section in config_data:
for section in data:
for entry_name, entry_data in section.items():
if entry_name == 'control-service':
if 'filename' in entry_data:
@@ -80,11 +75,12 @@ def get_receptor_sockfile(config_data):
raise RuntimeError(f'Receptor conf {__RECEPTOR_CONF} does not have control-service entry needed to get sockfile')
def get_tls_client(config_data, use_stream_tls=None):
def get_tls_client(use_stream_tls=None):
if not use_stream_tls:
return None
for section in config_data:
data = read_receptor_config()
for section in data:
for entry_name, entry_data in section.items():
if entry_name == 'tls-client':
if 'name' in entry_data:
@@ -92,12 +88,10 @@ def get_tls_client(config_data, use_stream_tls=None):
return None
def get_receptor_ctl(config_data=None):
if config_data is None:
config_data = read_receptor_config()
receptor_sockfile = get_receptor_sockfile(config_data)
def get_receptor_ctl():
receptor_sockfile = get_receptor_sockfile()
try:
return ReceptorControl(receptor_sockfile, config=__RECEPTOR_CONF, tlsclient=get_tls_client(config_data, True))
return ReceptorControl(receptor_sockfile, config=__RECEPTOR_CONF, tlsclient=get_tls_client(True))
except RuntimeError:
return ReceptorControl(receptor_sockfile)
@@ -165,18 +159,15 @@ def run_until_complete(node, timing_data=None, **kwargs):
"""
Runs an ansible-runner work_type on remote node, waits until it completes, then returns stdout.
"""
config_data = read_receptor_config()
receptor_ctl = get_receptor_ctl(config_data)
receptor_ctl = get_receptor_ctl()
use_stream_tls = getattr(get_conn_type(node, receptor_ctl), 'name', None) == "STREAMTLS"
kwargs.setdefault('tlsclient', get_tls_client(config_data, use_stream_tls))
kwargs.setdefault('tlsclient', get_tls_client(use_stream_tls))
kwargs.setdefault('ttl', '20s')
kwargs.setdefault('payload', '')
if work_signing_enabled(config_data):
kwargs['signwork'] = True
transmit_start = time.time()
result = receptor_ctl.submit_work(worktype='ansible-runner', node=node, **kwargs)
result = receptor_ctl.submit_work(worktype='ansible-runner', node=node, signwork=True, **kwargs)
unit_id = result['unitid']
run_start = time.time()
@@ -217,10 +208,7 @@ def run_until_complete(node, timing_data=None, **kwargs):
if state_name.lower() == 'failed':
work_detail = status.get('Detail', '')
if work_detail:
if stdout:
raise RemoteJobError(f'Receptor error from {node}, detail:\n{work_detail}\nstdout:\n{stdout}')
else:
raise RemoteJobError(f'Receptor error from {node}, detail:\n{work_detail}')
raise RemoteJobError(f'Receptor error from {node}, detail:\n{work_detail}')
else:
raise RemoteJobError(f'Unknown ansible-runner error on node {node}, stdout:\n{stdout}')
@@ -311,8 +299,7 @@ class AWXReceptorJob:
def run(self):
# We establish a connection to the Receptor socket
self.config_data = read_receptor_config()
receptor_ctl = get_receptor_ctl(self.config_data)
receptor_ctl = get_receptor_ctl()
res = None
try:
@@ -337,7 +324,7 @@ class AWXReceptorJob:
if self.work_type == 'ansible-runner':
work_submit_kw['node'] = self.task.instance.execution_node
use_stream_tls = get_conn_type(work_submit_kw['node'], receptor_ctl).name == "STREAMTLS"
work_submit_kw['tlsclient'] = get_tls_client(self.config_data, use_stream_tls)
work_submit_kw['tlsclient'] = get_tls_client(use_stream_tls)
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
transmitter_future = executor.submit(self.transmit, sockin)
@@ -411,11 +398,9 @@ class AWXReceptorJob:
unit_status = receptor_ctl.simple_command(f'work status {self.unit_id}')
detail = unit_status.get('Detail', None)
state_name = unit_status.get('StateName', None)
stdout_size = unit_status.get('StdoutSize', 0)
except Exception:
detail = ''
state_name = ''
stdout_size = 0
logger.exception(f'An error was encountered while getting status for work unit {self.unit_id}')
if 'exceeded quota' in detail:
@@ -426,16 +411,9 @@ class AWXReceptorJob:
return
try:
receptor_output = ''
if state_name == 'Failed' and self.task.runner_callback.event_ct == 0:
# if receptor work unit failed and no events were emitted, work results may
# contain useful information about why the job failed. In case stdout is
# massive, only ask for last 1000 bytes
startpos = max(stdout_size - 1000, 0)
resultsock, resultfile = receptor_ctl.get_work_results(self.unit_id, startpos=startpos, return_socket=True, return_sockfile=True)
resultsock.setblocking(False) # this makes resultfile reads non blocking
lines = resultfile.readlines()
receptor_output = b"".join(lines).decode()
resultsock = receptor_ctl.get_work_results(self.unit_id, return_sockfile=True)
lines = resultsock.readlines()
receptor_output = b"".join(lines).decode()
if receptor_output:
self.task.runner_callback.delay_update(result_traceback=receptor_output)
elif detail:
@@ -496,9 +474,7 @@ class AWXReceptorJob:
@property
def sign_work(self):
if self.work_type in ('ansible-runner', 'local'):
return work_signing_enabled(self.config_data)
return False
return True if self.work_type in ('ansible-runner', 'local') else False
@property
def work_type(self):

View File

@@ -52,7 +52,6 @@ from awx.main.constants import ACTIVE_STATES
from awx.main.dispatch.publish import task
from awx.main.dispatch import get_local_queuename, reaper
from awx.main.utils.common import (
get_type_for_model,
ignore_inventory_computed_fields,
ignore_inventory_group_removal,
ScheduleWorkflowManager,
@@ -721,43 +720,45 @@ def handle_work_success(task_actual):
@task(queue=get_local_queuename)
def handle_work_error(task_actual):
try:
instance = UnifiedJob.get_instance_by_type(task_actual['type'], task_actual['id'])
except ObjectDoesNotExist:
logger.warning('Missing {} `{}` in error callback.'.format(task_actual['type'], task_actual['id']))
return
if not instance:
return
def handle_work_error(task_id, *args, **kwargs):
subtasks = kwargs.get('subtasks', None)
logger.debug('Executing error task id %s, subtasks: %s' % (task_id, str(subtasks)))
first_instance = None
first_instance_type = ''
if subtasks is not None:
for each_task in subtasks:
try:
instance = UnifiedJob.get_instance_by_type(each_task['type'], each_task['id'])
if not instance:
# Unknown task type
logger.warning("Unknown task type: {}".format(each_task['type']))
continue
except ObjectDoesNotExist:
logger.warning('Missing {} `{}` in error callback.'.format(each_task['type'], each_task['id']))
continue
subtasks = instance.get_jobs_fail_chain() # reverse of dependent_jobs mostly
logger.debug(f'Executing error task id {task_actual["id"]}, subtasks: {[subtask.id for subtask in subtasks]}')
if first_instance is None:
first_instance = instance
first_instance_type = each_task['type']
deps_of_deps = {}
for subtask in subtasks:
if subtask.celery_task_id != instance.celery_task_id and not subtask.cancel_flag and not subtask.status in ('successful', 'failed'):
# If there are multiple in the dependency chain, A->B->C, and this was called for A, blame B for clarity
blame_job = deps_of_deps.get(subtask.id, instance)
subtask.status = 'failed'
subtask.failed = True
if not subtask.job_explanation:
subtask.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
get_type_for_model(type(blame_job)),
blame_job.name,
blame_job.id,
)
subtask.save()
subtask.websocket_emit_status("failed")
for sub_subtask in subtask.get_jobs_fail_chain():
deps_of_deps[sub_subtask.id] = subtask
if instance.celery_task_id != task_id and not instance.cancel_flag and not instance.status in ('successful', 'failed'):
instance.status = 'failed'
instance.failed = True
if not instance.job_explanation:
instance.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
first_instance_type,
first_instance.name,
first_instance.id,
)
instance.save()
instance.websocket_emit_status("failed")
# We only send 1 job complete message since all the job completion message
# handling does is trigger the scheduler. If we extend the functionality of
# what the job complete message handler does then we may want to send a
# completion event for each job here.
schedule_manager_success_or_error(instance)
if first_instance:
schedule_manager_success_or_error(first_instance)
@task(queue=get_local_queuename)

View File

@@ -3,6 +3,5 @@
"ANSIBLE_TRANSFORM_INVALID_GROUP_CHARS": "never",
"AWS_ACCESS_KEY_ID": "fooo",
"AWS_SECRET_ACCESS_KEY": "fooo",
"AWS_SECURITY_TOKEN": "fooo",
"AWS_SESSION_TOKEN": "fooo"
"AWS_SECURITY_TOKEN": "fooo"
}

View File

@@ -4,10 +4,8 @@ from awx.api.versioning import reverse
from awx.main.models.activity_stream import ActivityStream
from awx.main.models.ha import Instance
from django.test.utils import override_settings
INSTANCE_KWARGS = dict(hostname='example-host', cpu=6, node_type='execution', memory=36000000000, cpu_capacity=6, mem_capacity=42)
INSTANCE_KWARGS = dict(hostname='example-host', cpu=6, memory=36000000000, cpu_capacity=6, mem_capacity=42)
@pytest.mark.django_db
@@ -56,33 +54,3 @@ def test_health_check_usage(get, post, admin_user):
get(url=url, user=admin_user, expect=200)
r = post(url=url, user=admin_user, expect=200)
assert r.data['msg'] == f"Health check is running for {instance.hostname}."
def test_custom_hostname_regex(post, admin_user):
url = reverse('api:instance_list')
with override_settings(IS_K8S=True):
for value in [
("foo.bar.baz", 201),
("f.bar.bz", 201),
("foo.bar.b", 400),
("a.b.c", 400),
("localhost", 400),
("127.0.0.1", 400),
("192.168.56.101", 201),
("2001:0db8:85a3:0000:0000:8a2e:0370:7334", 201),
("foobar", 201),
("--yoooo", 400),
("$3$@foobar@#($!@#*$", 400),
("999.999.999.999", 201),
("0000:0000:0000:0000:0000:0000:0000:0001", 400),
("whitespaces are bad for hostnames", 400),
("0:0:0:0:0:0:0:1", 400),
("192.localhost.domain.101", 201),
("F@$%(@#$H%^(I@#^HCTQEWRFG", 400),
]:
data = {
"hostname": value[0],
"node_type": "execution",
"node_state": "installed",
}
post(url=url, user=admin_user, data=data, expect=value[1])

View File

@@ -216,7 +216,7 @@ def test_instance_attach_to_instance_group(post, instance_group, node_type_insta
count = ActivityStream.objects.count()
url = reverse('api:instance_group_instance_list', kwargs={'pk': instance_group.pk})
url = reverse(f'api:instance_group_instance_list', kwargs={'pk': instance_group.pk})
post(url, {'associate': True, 'id': instance.id}, admin, expect=204 if node_type != 'control' else 400)
new_activity = ActivityStream.objects.all()[count:]
@@ -240,7 +240,7 @@ def test_instance_unattach_from_instance_group(post, instance_group, node_type_i
count = ActivityStream.objects.count()
url = reverse('api:instance_group_instance_list', kwargs={'pk': instance_group.pk})
url = reverse(f'api:instance_group_instance_list', kwargs={'pk': instance_group.pk})
post(url, {'disassociate': True, 'id': instance.id}, admin, expect=204 if node_type != 'control' else 400)
new_activity = ActivityStream.objects.all()[count:]
@@ -263,7 +263,7 @@ def test_instance_group_attach_to_instance(post, instance_group, node_type_insta
count = ActivityStream.objects.count()
url = reverse('api:instance_instance_groups_list', kwargs={'pk': instance.pk})
url = reverse(f'api:instance_instance_groups_list', kwargs={'pk': instance.pk})
post(url, {'associate': True, 'id': instance_group.id}, admin, expect=204 if node_type != 'control' else 400)
new_activity = ActivityStream.objects.all()[count:]
@@ -287,7 +287,7 @@ def test_instance_group_unattach_from_instance(post, instance_group, node_type_i
count = ActivityStream.objects.count()
url = reverse('api:instance_instance_groups_list', kwargs={'pk': instance.pk})
url = reverse(f'api:instance_instance_groups_list', kwargs={'pk': instance.pk})
post(url, {'disassociate': True, 'id': instance_group.id}, admin, expect=204 if node_type != 'control' else 400)
new_activity = ActivityStream.objects.all()[count:]
@@ -314,4 +314,4 @@ def test_cannot_remove_controlplane_hybrid_instances(post, controlplane_instance
url = reverse('api:instance_instance_groups_list', kwargs={'pk': instance.pk})
r = post(url, {'disassociate': True, 'id': controlplane_instance_group.id}, admin_user, expect=400)
assert 'Cannot disassociate hybrid instance' in str(r.data)
assert f'Cannot disassociate hybrid instance' in str(r.data)

View File

@@ -105,30 +105,6 @@ def test_encrypted_survey_answer(post, patch, admin_user, project, inventory, su
assert decrypt_value(get_encryption_key('value', pk=None), schedule.extra_data['var1']) == 'bar'
@pytest.mark.django_db
def test_survey_password_default(post, patch, admin_user, project, inventory, survey_spec_factory):
job_template = JobTemplate.objects.create(
name='test-jt',
project=project,
playbook='helloworld.yml',
inventory=inventory,
ask_variables_on_launch=False,
survey_enabled=True,
survey_spec=survey_spec_factory([{'variable': 'var1', 'question_name': 'Q1', 'type': 'password', 'required': True, 'default': 'foobar'}]),
)
# test removal of $encrypted$
url = reverse('api:job_template_schedules_list', kwargs={'pk': job_template.id})
r = post(url, {'name': 'test sch', 'rrule': RRULE_EXAMPLE, 'extra_data': '{"var1": "$encrypted$"}'}, admin_user, expect=201)
schedule = Schedule.objects.get(pk=r.data['id'])
assert schedule.extra_data == {}
assert schedule.enabled is True
# test an unrelated change
patch(schedule.get_absolute_url(), data={'enabled': False}, user=admin_user, expect=200)
patch(schedule.get_absolute_url(), data={'enabled': True}, user=admin_user, expect=200)
@pytest.mark.django_db
@pytest.mark.parametrize(
'rrule, error',
@@ -147,19 +123,19 @@ def test_survey_password_default(post, patch, admin_user, project, inventory, su
("DTSTART:20030925T104941Z RRULE:FREQ=DAILY;INTERVAL=10;COUNT=500;UNTIL=20040925T104941Z", "RRULE may not contain both COUNT and UNTIL"), # noqa
("DTSTART:20300308T050000Z RRULE:FREQ=DAILY;INTERVAL=1;COUNT=2000", "COUNT > 999 is unsupported"), # noqa
# Individual rule test with multiple rules
# Bad Rule: RRULE:NONSENSE
## Bad Rule: RRULE:NONSENSE
("DTSTART:20300308T050000Z RRULE:NONSENSE RRULE:INTERVAL=1;FREQ=DAILY EXRULE:FREQ=WEEKLY;INTERVAL=1;BYDAY=SU", "INTERVAL required in rrule"),
# Bad Rule: RRULE:FREQ=YEARLY;INTERVAL=1;BYDAY=5MO
## Bad Rule: RRULE:FREQ=YEARLY;INTERVAL=1;BYDAY=5MO
(
"DTSTART:20300308T050000Z RRULE:INTERVAL=1;FREQ=DAILY EXRULE:FREQ=WEEKLY;INTERVAL=1;BYDAY=SU RRULE:FREQ=YEARLY;INTERVAL=1;BYDAY=5MO",
"BYDAY with numeric prefix not supported",
), # noqa
# Bad Rule: RRULE:FREQ=DAILY;INTERVAL=10;COUNT=500;UNTIL=20040925T104941Z
## Bad Rule: RRULE:FREQ=DAILY;INTERVAL=10;COUNT=500;UNTIL=20040925T104941Z
(
"DTSTART:20030925T104941Z RRULE:INTERVAL=1;FREQ=DAILY EXRULE:FREQ=WEEKLY;INTERVAL=1;BYDAY=SU RRULE:FREQ=DAILY;INTERVAL=10;COUNT=500;UNTIL=20040925T104941Z",
"RRULE may not contain both COUNT and UNTIL",
), # noqa
# Bad Rule: RRULE:FREQ=DAILY;INTERVAL=1;COUNT=2000
## Bad Rule: RRULE:FREQ=DAILY;INTERVAL=1;COUNT=2000
(
"DTSTART:20300308T050000Z RRULE:INTERVAL=1;FREQ=DAILY EXRULE:FREQ=WEEKLY;INTERVAL=1;BYDAY=SU RRULE:FREQ=DAILY;INTERVAL=1;COUNT=2000",
"COUNT > 999 is unsupported",

View File

@@ -1,15 +1,7 @@
import pytest
import time
from unittest import mock
from uuid import uuid4
from django.test import TransactionTestCase
from awx.main.dispatch.worker.callback import job_stats_wrapup, CallbackBrokerWorker
from awx.main.dispatch.worker.callback import job_stats_wrapup
from awx.main.models.jobs import Job
from awx.main.models.inventory import InventoryUpdate, InventorySource
from awx.main.models.events import InventoryUpdateEvent
@pytest.mark.django_db
@@ -32,108 +24,3 @@ def test_wrapup_does_send_notifications(mocker):
job.refresh_from_db()
assert job.host_status_counts == {}
mock.assert_called_once_with('succeeded')
class FakeRedis:
def keys(self, *args, **kwargs):
return []
def set(self):
pass
def get(self):
return None
@classmethod
def from_url(cls, *args, **kwargs):
return cls()
def pipeline(self):
return self
class TestCallbackBrokerWorker(TransactionTestCase):
@pytest.fixture(autouse=True)
def turn_off_websockets(self):
with mock.patch('awx.main.dispatch.worker.callback.emit_event_detail', lambda *a, **kw: None):
yield
def get_worker(self):
with mock.patch('redis.Redis', new=FakeRedis): # turn off redis stuff
return CallbackBrokerWorker()
def event_create_kwargs(self):
inventory_update = InventoryUpdate.objects.create(source='file', inventory_source=InventorySource.objects.create(source='file'))
return dict(inventory_update=inventory_update, created=inventory_update.created)
def test_flush_with_valid_event(self):
worker = self.get_worker()
events = [InventoryUpdateEvent(uuid=str(uuid4()), **self.event_create_kwargs())]
worker.buff = {InventoryUpdateEvent: events}
worker.flush()
assert worker.buff.get(InventoryUpdateEvent, []) == []
assert InventoryUpdateEvent.objects.filter(uuid=events[0].uuid).count() == 1
def test_flush_with_invalid_event(self):
worker = self.get_worker()
kwargs = self.event_create_kwargs()
events = [
InventoryUpdateEvent(uuid=str(uuid4()), stdout='good1', **kwargs),
InventoryUpdateEvent(uuid=str(uuid4()), stdout='bad', counter=-2, **kwargs),
InventoryUpdateEvent(uuid=str(uuid4()), stdout='good2', **kwargs),
]
worker.buff = {InventoryUpdateEvent: events.copy()}
worker.flush()
assert InventoryUpdateEvent.objects.filter(uuid=events[0].uuid).count() == 1
assert InventoryUpdateEvent.objects.filter(uuid=events[1].uuid).count() == 0
assert InventoryUpdateEvent.objects.filter(uuid=events[2].uuid).count() == 1
assert worker.buff == {InventoryUpdateEvent: [events[1]]}
def test_duplicate_key_not_saved_twice(self):
worker = self.get_worker()
events = [InventoryUpdateEvent(uuid=str(uuid4()), **self.event_create_kwargs())]
worker.buff = {InventoryUpdateEvent: events.copy()}
worker.flush()
# put current saved event in buffer (error case)
worker.buff = {InventoryUpdateEvent: [InventoryUpdateEvent.objects.get(uuid=events[0].uuid)]}
worker.last_flush = time.time() - 2.0
# here, the bulk_create will fail with UNIQUE constraint violation, but individual saves should resolve it
worker.flush()
assert InventoryUpdateEvent.objects.filter(uuid=events[0].uuid).count() == 1
assert worker.buff.get(InventoryUpdateEvent, []) == []
def test_give_up_on_bad_event(self):
worker = self.get_worker()
events = [InventoryUpdateEvent(uuid=str(uuid4()), counter=-2, **self.event_create_kwargs())]
worker.buff = {InventoryUpdateEvent: events.copy()}
for i in range(5):
worker.last_flush = time.time() - 2.0
worker.flush()
# Could not save, should be logged, and buffer should be cleared
assert worker.buff.get(InventoryUpdateEvent, []) == []
assert InventoryUpdateEvent.objects.filter(uuid=events[0].uuid).count() == 0 # sanity
def test_postgres_invalid_NUL_char(self):
# In postgres, text fields reject NUL character, 0x00
# tests use sqlite3 which will not raise an error
# but we can still test that it is sanitized before saving
worker = self.get_worker()
kwargs = self.event_create_kwargs()
events = [InventoryUpdateEvent(uuid=str(uuid4()), stdout="\x00", **kwargs)]
assert "\x00" in events[0].stdout # sanity
worker.buff = {InventoryUpdateEvent: events.copy()}
with mock.patch.object(InventoryUpdateEvent.objects, 'bulk_create', side_effect=ValueError):
with mock.patch.object(events[0], 'save', side_effect=ValueError):
worker.flush()
assert "\x00" not in events[0].stdout
worker.last_flush = time.time() - 2.0
worker.flush()
event = InventoryUpdateEvent.objects.get(uuid=events[0].uuid)
assert "\x00" not in event.stdout

View File

@@ -171,17 +171,13 @@ class TestKeyRegeneration:
def test_use_custom_key_with_empty_tower_secret_key_env_var(self):
os.environ['TOWER_SECRET_KEY'] = ''
with pytest.raises(SystemExit) as e:
call_command('regenerate_secret_key', '--use-custom-key')
assert e.type == SystemExit
assert e.value.code == 1
new_key = call_command('regenerate_secret_key', '--use-custom-key')
assert settings.SECRET_KEY != new_key
def test_use_custom_key_with_no_tower_secret_key_env_var(self):
os.environ.pop('TOWER_SECRET_KEY', None)
with pytest.raises(SystemExit) as e:
call_command('regenerate_secret_key', '--use-custom-key')
assert e.type == SystemExit
assert e.value.code == 1
new_key = call_command('regenerate_secret_key', '--use-custom-key')
assert settings.SECRET_KEY != new_key
def test_with_tower_secret_key_env_var(self):
custom_key = 'MXSq9uqcwezBOChl/UfmbW1k4op+bC+FQtwPqgJ1u9XV'

View File

@@ -4,7 +4,7 @@ from awx.main.models import (
Instance,
InstanceGroup,
)
from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups
from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups, TaskManagerInstances
class TestInstanceGroupInstanceMapping(TransactionTestCase):
@@ -23,10 +23,11 @@ class TestInstanceGroupInstanceMapping(TransactionTestCase):
def test_mapping(self):
self.sample_cluster()
with self.assertNumQueries(3):
instance_groups = TaskManagerInstanceGroups()
instances = TaskManagerInstances([]) # empty task list
instance_groups = TaskManagerInstanceGroups(instances_by_hostname=instances)
ig_instance_map = instance_groups.instance_groups
assert set(i.hostname for i in ig_instance_map['ig_small'].instances) == set(['i1'])
assert set(i.hostname for i in ig_instance_map['ig_large'].instances) == set(['i2', 'i3'])
assert set(i.hostname for i in ig_instance_map['default'].instances) == set(['i2'])
assert set(i.hostname for i in ig_instance_map['ig_small']['instances']) == set(['i1'])
assert set(i.hostname for i in ig_instance_map['ig_large']['instances']) == set(['i2', 'i3'])
assert set(i.hostname for i in ig_instance_map['default']['instances']) == set(['i2'])

View File

@@ -10,10 +10,6 @@ from awx.main.utils import (
create_temporary_fifo,
)
from awx.main.scheduler import TaskManager
from . import create_job
@pytest.fixture
def containerized_job(default_instance_group, kube_credential, job_template_factory):
@@ -38,50 +34,6 @@ def test_containerized_job(containerized_job):
assert containerized_job.instance_group.credential.kubernetes
@pytest.mark.django_db
def test_max_concurrent_jobs_blocks_start_of_new_jobs(controlplane_instance_group, containerized_job, mocker):
"""Construct a scenario where only 1 job will fit within the max_concurrent_jobs of the container group.
Since max_concurrent_jobs is set to 1, even though 2 jobs are in pending
and would be launched into the container group, only one will be started.
"""
containerized_job.unified_job_template.allow_simultaneous = True
containerized_job.unified_job_template.save()
default_instance_group = containerized_job.instance_group
default_instance_group.max_concurrent_jobs = 1
default_instance_group.save()
task_impact = 1
# Create a second job that should not be scheduled at first, blocked by the other
create_job(containerized_job.unified_job_template)
tm = TaskManager()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = task_impact
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_called_once()
@pytest.mark.django_db
def test_max_forks_blocks_start_of_new_jobs(controlplane_instance_group, containerized_job, mocker):
"""Construct a scenario where only 1 job will fit within the max_forks of the container group.
In this case, we set the container_group max_forks to 10, and make the task_impact of a job 6.
Therefore, only 1 job will fit within the max of 10.
"""
containerized_job.unified_job_template.allow_simultaneous = True
containerized_job.unified_job_template.save()
default_instance_group = containerized_job.instance_group
default_instance_group.max_forks = 10
# Create a second job that should not be scheduled
create_job(containerized_job.unified_job_template)
tm = TaskManager()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = 6
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
tm.schedule()
tm.start_task.assert_called_once()
@pytest.mark.django_db
def test_kubectl_ssl_verification(containerized_job, default_job_execution_environment):
containerized_job.execution_environment = default_job_execution_environment

View File

@@ -23,7 +23,7 @@ def test_multi_group_basic_job_launch(instance_factory, controlplane_instance_gr
mock_task_impact.return_value = 500
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_has_calls([mock.call(j1, ig1, i1), mock.call(j2, ig2, i2)])
TaskManager.start_task.assert_has_calls([mock.call(j1, ig1, [], i1), mock.call(j2, ig2, [], i2)])
@pytest.mark.django_db
@@ -54,7 +54,7 @@ def test_multi_group_with_shared_dependency(instance_factory, controlplane_insta
DependencyManager().schedule()
TaskManager().schedule()
pu = p.project_updates.first()
TaskManager.start_task.assert_called_once_with(pu, controlplane_instance_group, controlplane_instance_group.instances.all()[0])
TaskManager.start_task.assert_called_once_with(pu, controlplane_instance_group, [j1, j2], controlplane_instance_group.instances.all()[0])
pu.finished = pu.created + timedelta(seconds=1)
pu.status = "successful"
pu.save()
@@ -62,8 +62,8 @@ def test_multi_group_with_shared_dependency(instance_factory, controlplane_insta
DependencyManager().schedule()
TaskManager().schedule()
TaskManager.start_task.assert_any_call(j1, ig1, i1)
TaskManager.start_task.assert_any_call(j2, ig2, i2)
TaskManager.start_task.assert_any_call(j1, ig1, [], i1)
TaskManager.start_task.assert_any_call(j2, ig2, [], i2)
assert TaskManager.start_task.call_count == 2
@@ -75,7 +75,7 @@ def test_workflow_job_no_instancegroup(workflow_job_template_factory, controlpla
wfj.save()
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(wfj, None, None)
TaskManager.start_task.assert_called_once_with(wfj, None, [], None)
assert wfj.instance_group is None
@@ -150,7 +150,7 @@ def test_failover_group_run(instance_factory, controlplane_instance_group, mocke
mock_task_impact.return_value = 500
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_has_calls([mock.call(j1, ig1, i1), mock.call(j1_1, ig2, i2)])
mock_job.assert_has_calls([mock.call(j1, ig1, [], i1), mock.call(j1_1, ig2, [], i2)])
assert mock_job.call_count == 2

View File

@@ -18,7 +18,7 @@ def test_single_job_scheduler_launch(hybrid_instance, controlplane_instance_grou
j = create_job(objects.job_template)
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, [], instance)
@pytest.mark.django_db
@@ -240,82 +240,12 @@ def test_multi_jt_capacity_blocking(hybrid_instance, job_template_factory, mocke
mock_task_impact.return_value = 505
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_called_once_with(j1, controlplane_instance_group, instance)
mock_job.assert_called_once_with(j1, controlplane_instance_group, [], instance)
j1.status = "successful"
j1.save()
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_called_once_with(j2, controlplane_instance_group, instance)
@pytest.mark.django_db
def test_max_concurrent_jobs_ig_capacity_blocking(hybrid_instance, job_template_factory, mocker):
"""When max_concurrent_jobs of an instance group is more restrictive than capacity of instances, enforce max_concurrent_jobs."""
instance = hybrid_instance
controlplane_instance_group = instance.rampart_groups.first()
# We will expect only 1 job to be started
controlplane_instance_group.max_concurrent_jobs = 1
controlplane_instance_group.save()
num_jobs = 3
jobs = []
for i in range(num_jobs):
jobs.append(
create_job(job_template_factory(f'jt{i}', organization=f'org{i}', project=f'proj{i}', inventory=f'inv{i}', credential=f'cred{i}').job_template)
)
tm = TaskManager()
task_impact = 1
# Sanity check that multiple jobs would run if not for the max_concurrent_jobs setting.
assert task_impact * num_jobs < controlplane_instance_group.capacity
tm = TaskManager()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = task_impact
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_called_once()
jobs[0].status = 'running'
jobs[0].controller_node = instance.hostname
jobs[0].execution_node = instance.hostname
jobs[0].instance_group = controlplane_instance_group
jobs[0].save()
# while that job is running, we should not start another job
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = task_impact
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_not_called()
# now job is done, we should start one of the two other jobs
jobs[0].status = 'successful'
jobs[0].save()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = task_impact
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_called_once()
@pytest.mark.django_db
def test_max_forks_ig_capacity_blocking(hybrid_instance, job_template_factory, mocker):
"""When max_forks of an instance group is less than the capacity of instances, enforce max_forks."""
instance = hybrid_instance
controlplane_instance_group = instance.rampart_groups.first()
controlplane_instance_group.max_forks = 15
controlplane_instance_group.save()
task_impact = 10
num_jobs = 2
# Sanity check that 2 jobs would run if not for the max_forks setting.
assert controlplane_instance_group.max_forks < controlplane_instance_group.capacity
assert task_impact * num_jobs > controlplane_instance_group.max_forks
assert task_impact * num_jobs < controlplane_instance_group.capacity
for i in range(num_jobs):
create_job(job_template_factory(f'jt{i}', organization=f'org{i}', project=f'proj{i}', inventory=f'inv{i}', credential=f'cred{i}').job_template)
tm = TaskManager()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = task_impact
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_called_once()
mock_job.assert_called_once_with(j2, controlplane_instance_group, [], instance)
@pytest.mark.django_db
@@ -337,12 +267,12 @@ def test_single_job_dependencies_project_launch(controlplane_instance_group, job
pu = [x for x in p.project_updates.all()]
assert len(pu) == 1
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(pu[0], controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(pu[0], controlplane_instance_group, [j], instance)
pu[0].status = "successful"
pu[0].save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, [], instance)
@pytest.mark.django_db
@@ -365,12 +295,12 @@ def test_single_job_dependencies_inventory_update_launch(controlplane_instance_g
iu = [x for x in ii.inventory_updates.all()]
assert len(iu) == 1
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(iu[0], controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(iu[0], controlplane_instance_group, [j], instance)
iu[0].status = "successful"
iu[0].save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, [], instance)
@pytest.mark.django_db
@@ -412,7 +342,7 @@ def test_job_dependency_with_already_updated(controlplane_instance_group, job_te
mock_iu.assert_not_called()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, [], instance)
@pytest.mark.django_db
@@ -442,7 +372,9 @@ def test_shared_dependencies_launch(controlplane_instance_group, job_template_fa
TaskManager().schedule()
pu = p.project_updates.first()
iu = ii.inventory_updates.first()
TaskManager.start_task.assert_has_calls([mock.call(iu, controlplane_instance_group, instance), mock.call(pu, controlplane_instance_group, instance)])
TaskManager.start_task.assert_has_calls(
[mock.call(iu, controlplane_instance_group, [j1, j2], instance), mock.call(pu, controlplane_instance_group, [j1, j2], instance)]
)
pu.status = "successful"
pu.finished = pu.created + timedelta(seconds=1)
pu.save()
@@ -451,7 +383,9 @@ def test_shared_dependencies_launch(controlplane_instance_group, job_template_fa
iu.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_has_calls([mock.call(j1, controlplane_instance_group, instance), mock.call(j2, controlplane_instance_group, instance)])
TaskManager.start_task.assert_has_calls(
[mock.call(j1, controlplane_instance_group, [], instance), mock.call(j2, controlplane_instance_group, [], instance)]
)
pu = [x for x in p.project_updates.all()]
iu = [x for x in ii.inventory_updates.all()]
assert len(pu) == 1
@@ -475,7 +409,7 @@ def test_job_not_blocking_project_update(controlplane_instance_group, job_templa
project_update.status = "pending"
project_update.save()
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(project_update, controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(project_update, controlplane_instance_group, [], instance)
@pytest.mark.django_db
@@ -499,7 +433,7 @@ def test_job_not_blocking_inventory_update(controlplane_instance_group, job_temp
DependencyManager().schedule()
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(inventory_update, controlplane_instance_group, instance)
TaskManager.start_task.assert_called_once_with(inventory_update, controlplane_instance_group, [], instance)
@pytest.mark.django_db

View File

@@ -1,7 +1,7 @@
import pytest
from unittest import mock
from awx.main.models import AdHocCommand, InventoryUpdate, JobTemplate, Job
from awx.main.models import AdHocCommand, InventoryUpdate, JobTemplate
from awx.main.models.activity_stream import ActivityStream
from awx.main.models.ha import Instance, InstanceGroup
from awx.main.tasks.system import apply_cluster_membership_policies
@@ -15,24 +15,6 @@ def test_default_tower_instance_group(default_instance_group, job_factory):
assert default_instance_group in job_factory().preferred_instance_groups
@pytest.mark.django_db
@pytest.mark.parametrize('node_type', ('execution', 'control'))
@pytest.mark.parametrize('active', (True, False))
def test_get_cleanup_task_kwargs_active_jobs(node_type, active):
instance = Instance.objects.create(hostname='foobar', node_type=node_type)
job_kwargs = dict()
job_kwargs['controller_node' if node_type == 'control' else 'execution_node'] = instance.hostname
job_kwargs['status'] = 'running' if active else 'successful'
job = Job.objects.create(**job_kwargs)
kwargs = instance.get_cleanup_task_kwargs()
if active:
assert kwargs['exclude_strings'] == [f'awx_{job.pk}_']
else:
assert 'exclude_strings' not in kwargs
@pytest.mark.django_db
class TestPolicyTaskScheduling:
"""Tests make assertions about when the policy task gets scheduled"""

View File

@@ -121,8 +121,8 @@ def test_python_and_js_licenses():
return errors
base_dir = settings.BASE_DIR
api_licenses = index_licenses('%s/../licenses' % base_dir)
ui_licenses = index_licenses('%s/../licenses/ui' % base_dir)
api_licenses = index_licenses('%s/../docs/licenses' % base_dir)
ui_licenses = index_licenses('%s/../docs/licenses/ui' % base_dir)
api_requirements = read_api_requirements('%s/../requirements' % base_dir)
ui_requirements = read_ui_requirements('%s/ui' % base_dir)

View File

@@ -75,7 +75,6 @@ def test_encrypted_subfields(get, post, user, organization):
url = reverse('api:notification_template_detail', kwargs={'pk': response.data['id']})
response = get(url, u)
assert response.data['notification_configuration']['account_token'] == "$encrypted$"
with mock.patch.object(notification_template_actual.notification_class, "send_messages", assert_send):
notification_template_actual.send("Test", {'body': "Test"})
@@ -176,46 +175,3 @@ def test_custom_environment_injection(post, user, organization):
fake_send.side_effect = _send_side_effect
template.send('subject', 'message')
def mock_post(*args, **kwargs):
class MockGoodResponse:
def __init__(self):
self.status_code = 200
class MockRedirectResponse:
def __init__(self):
self.status_code = 301
self.headers = {"Location": "http://goodendpoint"}
if kwargs['url'] == "http://goodendpoint":
return MockGoodResponse()
else:
return MockRedirectResponse()
@pytest.mark.django_db
@mock.patch('requests.post', side_effect=mock_post)
def test_webhook_notification_pointed_to_a_redirect_launch_endpoint(post, admin, organization):
n1 = NotificationTemplate.objects.create(
name="test-webhook",
description="test webhook",
organization=organization,
notification_type="webhook",
notification_configuration=dict(
url="http://some.fake.url",
disable_ssl_verification=True,
http_method="POST",
headers={
"Content-Type": "application/json",
},
username=admin.username,
password=admin.password,
),
messages={
"success": {"message": "", "body": "{}"},
},
)
assert n1.send("", n1.messages.get("success").get("body")) == 1

View File

@@ -5,8 +5,8 @@ import tempfile
import shutil
from awx.main.tasks.jobs import RunJob
from awx.main.tasks.system import execution_node_health_check, _cleanup_images_and_files, handle_work_error
from awx.main.models import Instance, Job, InventoryUpdate, ProjectUpdate
from awx.main.tasks.system import execution_node_health_check, _cleanup_images_and_files
from awx.main.models import Instance, Job
@pytest.fixture
@@ -74,17 +74,3 @@ def test_does_not_run_reaped_job(mocker, mock_me):
job.refresh_from_db()
assert job.status == 'failed'
mock_run.assert_not_called()
@pytest.mark.django_db
def test_handle_work_error_nested(project, inventory_source):
pu = ProjectUpdate.objects.create(status='failed', project=project, celery_task_id='1234')
iu = InventoryUpdate.objects.create(status='pending', inventory_source=inventory_source, source='scm')
job = Job.objects.create(status='pending')
iu.dependent_jobs.add(pu)
job.dependent_jobs.add(pu, iu)
handle_work_error({'type': 'project_update', 'id': pu.id})
iu.refresh_from_db()
job.refresh_from_db()
assert iu.job_explanation == f'Previous Task Failed: {{"job_type": "project_update", "job_name": "", "job_id": "{pu.id}"}}'
assert job.job_explanation == f'Previous Task Failed: {{"job_type": "inventory_update", "job_name": "", "job_id": "{iu.id}"}}'

View File

@@ -5,8 +5,7 @@ from unittest import mock
from collections import namedtuple
from awx.api.views.root import ApiVersionRootView
from awx.api.views import JobTemplateLabelList, InventoryInventorySourcesUpdate, JobTemplateSurveySpec
from awx.api.views import ApiVersionRootView, JobTemplateLabelList, InventoryInventorySourcesUpdate, JobTemplateSurveySpec
from awx.main.views import handle_error
@@ -24,7 +23,7 @@ class TestApiRootView:
endpoints = [
'ping',
'config',
# 'settings',
#'settings',
'me',
'dashboard',
'organizations',

View File

@@ -1,7 +1,10 @@
import pytest
from unittest import mock
from unittest.mock import Mock
from decimal import Decimal
from awx.main.models import Instance
from awx.main.models import InstanceGroup, Instance
from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups
@pytest.mark.parametrize('capacity_adjustment', [0.0, 0.25, 0.5, 0.75, 1, 1.5, 3])
@@ -14,6 +17,83 @@ def test_capacity_adjustment_no_save(capacity_adjustment):
assert inst.capacity == (float(inst.capacity_adjustment) * abs(inst.mem_capacity - inst.cpu_capacity) + min(inst.mem_capacity, inst.cpu_capacity))
def T(impact):
j = mock.Mock(spec_set=['task_impact', 'capacity_type'])
j.task_impact = impact
j.capacity_type = 'execution'
return j
def Is(param):
"""
param:
[remaining_capacity1, remaining_capacity2, remaining_capacity3, ...]
[(jobs_running1, capacity1), (jobs_running2, capacity2), (jobs_running3, capacity3), ...]
"""
instances = []
if isinstance(param[0], tuple):
for (jobs_running, capacity) in param:
inst = Mock()
inst.capacity = capacity
inst.jobs_running = jobs_running
inst.node_type = 'execution'
instances.append(inst)
else:
for i in param:
inst = Mock()
inst.remaining_capacity = i
inst.node_type = 'execution'
instances.append(inst)
return instances
class TestInstanceGroup(object):
@pytest.mark.parametrize(
'task,instances,instance_fit_index,reason',
[
(T(100), Is([100]), 0, "Only one, pick it"),
(T(100), Is([100, 100]), 0, "Two equally good fits, pick the first"),
(T(100), Is([50, 100]), 1, "First instance not as good as second instance"),
(T(100), Is([50, 0, 20, 100, 100, 100, 30, 20]), 3, "Pick Instance [3] as it is the first that the task fits in."),
(T(100), Is([50, 0, 20, 99, 11, 1, 5, 99]), None, "The task don't a fit, you must a quit!"),
],
)
def test_fit_task_to_most_remaining_capacity_instance(self, task, instances, instance_fit_index, reason):
InstanceGroup(id=10)
tm_igs = TaskManagerInstanceGroups(instance_groups={'controlplane': {'instances': instances}})
instance_picked = tm_igs.fit_task_to_most_remaining_capacity_instance(task, 'controlplane')
if instance_fit_index is None:
assert instance_picked is None, reason
else:
assert instance_picked == instances[instance_fit_index], reason
@pytest.mark.parametrize(
'instances,instance_fit_index,reason',
[
(Is([(0, 100)]), 0, "One idle instance, pick it"),
(Is([(1, 100)]), None, "One un-idle instance, pick nothing"),
(Is([(0, 100), (0, 200), (1, 500), (0, 700)]), 3, "Pick the largest idle instance"),
(Is([(0, 100), (0, 200), (1, 10000), (0, 700), (0, 699)]), 3, "Pick the largest idle instance"),
(Is([(0, 0)]), None, "One idle but down instance, don't pick it"),
],
)
def test_find_largest_idle_instance(self, instances, instance_fit_index, reason):
def filter_offline_instances(*args):
return filter(lambda i: i.capacity > 0, instances)
InstanceGroup(id=10)
instances_online_only = filter_offline_instances(instances)
tm_igs = TaskManagerInstanceGroups(instance_groups={'controlplane': {'instances': instances_online_only}})
if instance_fit_index is None:
assert tm_igs.find_largest_idle_instance('controlplane') is None, reason
else:
assert tm_igs.find_largest_idle_instance('controlplane') == instances[instance_fit_index], reason
def test_cleanup_params_defaults():
inst = Instance(hostname='foobar')
assert inst.get_cleanup_task_kwargs(exclude_strings=['awx_423_']) == {'exclude_strings': ['awx_423_'], 'file_pattern': '/tmp/awx_*_*', 'grace_period': 60}

View File

@@ -36,14 +36,15 @@ def job(mocker, hosts, inventory):
def test_start_job_fact_cache(hosts, job, inventory, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = job.start_job_fact_cache(fact_cache, timeout=0)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
for host in hosts:
filepath = os.path.join(fact_cache, host.name)
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
assert f.read() == json.dumps(host.ansible_facts)
assert os.path.getmtime(filepath) <= last_modified
assert filepath in modified_times
def test_fact_cache_with_invalid_path_traversal(job, inventory, tmpdir, mocker):
@@ -57,16 +58,18 @@ def test_fact_cache_with_invalid_path_traversal(job, inventory, tmpdir, mocker):
)
fact_cache = os.path.join(tmpdir, 'facts')
job.start_job_fact_cache(fact_cache, timeout=0)
job.start_job_fact_cache(fact_cache, {}, 0)
# a file called "foo" should _not_ be written outside the facts dir
assert os.listdir(os.path.join(fact_cache, '..')) == ['facts']
def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = job.start_job_fact_cache(fact_cache, timeout=0)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
for h in hosts:
h.save = mocker.Mock()
ansible_facts_new = {"foo": "bar"}
filepath = os.path.join(fact_cache, hosts[1].name)
@@ -80,20 +83,23 @@ def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker,
new_modification_time = time.time() + 3600
os.utime(filepath, (new_modification_time, new_modification_time))
job.finish_job_fact_cache(fact_cache, last_modified)
job.finish_job_fact_cache(fact_cache, modified_times)
for host in (hosts[0], hosts[2], hosts[3]):
host.save.assert_not_called()
assert host.ansible_facts == {"a": 1, "b": 2}
assert host.ansible_facts_modified is None
assert hosts[1].ansible_facts == ansible_facts_new
bulk_update.assert_called_once_with([hosts[1]], ['ansible_facts', 'ansible_facts_modified'])
hosts[1].save.assert_called_once_with(update_fields=['ansible_facts', 'ansible_facts_modified'])
def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = job.start_job_fact_cache(fact_cache, timeout=0)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
for h in hosts:
h.save = mocker.Mock()
for h in hosts:
filepath = os.path.join(fact_cache, h.name)
@@ -103,22 +109,26 @@ def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpd
new_modification_time = time.time() + 3600
os.utime(filepath, (new_modification_time, new_modification_time))
job.finish_job_fact_cache(fact_cache, last_modified)
job.finish_job_fact_cache(fact_cache, modified_times)
bulk_update.assert_not_called()
for h in hosts:
h.save.assert_not_called()
def test_finish_job_fact_cache_clear(job, hosts, inventory, mocker, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = job.start_job_fact_cache(fact_cache, timeout=0)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
for h in hosts:
h.save = mocker.Mock()
os.remove(os.path.join(fact_cache, hosts[1].name))
job.finish_job_fact_cache(fact_cache, last_modified)
job.finish_job_fact_cache(fact_cache, modified_times)
for host in (hosts[0], hosts[2], hosts[3]):
host.save.assert_not_called()
assert host.ansible_facts == {"a": 1, "b": 2}
assert host.ansible_facts_modified is None
assert hosts[1].ansible_facts == {}
bulk_update.assert_called_once_with([hosts[1]], ['ansible_facts', 'ansible_facts_modified'])
hosts[1].save.assert_called_once_with()

View File

@@ -50,10 +50,7 @@ def test_cancel(unified_job):
# Some more thought may want to go into only emitting canceled if/when the job record
# status is changed to canceled. Unlike, currently, where it's emitted unconditionally.
unified_job.websocket_emit_status.assert_called_with("canceled")
assert [(args, kwargs) for args, kwargs in unified_job.save.call_args_list] == [
((), {'update_fields': ['cancel_flag', 'start_args']}),
((), {'update_fields': ['status']}),
]
unified_job.save.assert_called_with(update_fields=['cancel_flag', 'start_args', 'status'])
def test_cancel_job_explanation(unified_job):
@@ -63,10 +60,7 @@ def test_cancel_job_explanation(unified_job):
unified_job.cancel(job_explanation=job_explanation)
assert unified_job.job_explanation == job_explanation
assert [(args, kwargs) for args, kwargs in unified_job.save.call_args_list] == [
((), {'update_fields': ['cancel_flag', 'start_args', 'job_explanation']}),
((), {'update_fields': ['status']}),
]
unified_job.save.assert_called_with(update_fields=['cancel_flag', 'start_args', 'job_explanation', 'status'])
def test_organization_copy_to_jobs():

View File

@@ -27,12 +27,11 @@ def test_send_messages_as_POST():
]
)
requests_mock.post.assert_called_once_with(
url='http://example.com',
'http://example.com',
auth=None,
data=json.dumps({'text': 'test body'}, ensure_ascii=False).encode('utf-8'),
headers={'Content-Type': 'application/json', 'User-Agent': 'AWX 0.0.1.dev (open)'},
verify=True,
allow_redirects=False,
)
assert sent_messages == 1
@@ -58,12 +57,11 @@ def test_send_messages_as_PUT():
]
)
requests_mock.put.assert_called_once_with(
url='http://example.com',
'http://example.com',
auth=None,
data=json.dumps({'text': 'test body 2'}, ensure_ascii=False).encode('utf-8'),
headers={'Content-Type': 'application/json', 'User-Agent': 'AWX 0.0.1.dev (open)'},
verify=True,
allow_redirects=False,
)
assert sent_messages == 1
@@ -89,12 +87,11 @@ def test_send_messages_with_username():
]
)
requests_mock.post.assert_called_once_with(
url='http://example.com',
'http://example.com',
auth=('userstring', None),
data=json.dumps({'text': 'test body'}, ensure_ascii=False).encode('utf-8'),
headers={'Content-Type': 'application/json', 'User-Agent': 'AWX 0.0.1.dev (open)'},
verify=True,
allow_redirects=False,
)
assert sent_messages == 1
@@ -120,12 +117,11 @@ def test_send_messages_with_password():
]
)
requests_mock.post.assert_called_once_with(
url='http://example.com',
'http://example.com',
auth=(None, 'passwordstring'),
data=json.dumps({'text': 'test body'}, ensure_ascii=False).encode('utf-8'),
headers={'Content-Type': 'application/json', 'User-Agent': 'AWX 0.0.1.dev (open)'},
verify=True,
allow_redirects=False,
)
assert sent_messages == 1
@@ -151,12 +147,11 @@ def test_send_messages_with_username_and_password():
]
)
requests_mock.post.assert_called_once_with(
url='http://example.com',
'http://example.com',
auth=('userstring', 'passwordstring'),
data=json.dumps({'text': 'test body'}, ensure_ascii=False).encode('utf-8'),
headers={'Content-Type': 'application/json', 'User-Agent': 'AWX 0.0.1.dev (open)'},
verify=True,
allow_redirects=False,
)
assert sent_messages == 1
@@ -182,12 +177,11 @@ def test_send_messages_with_no_verify_ssl():
]
)
requests_mock.post.assert_called_once_with(
url='http://example.com',
'http://example.com',
auth=None,
data=json.dumps({'text': 'test body'}, ensure_ascii=False).encode('utf-8'),
headers={'Content-Type': 'application/json', 'User-Agent': 'AWX 0.0.1.dev (open)'},
verify=False,
allow_redirects=False,
)
assert sent_messages == 1
@@ -213,7 +207,7 @@ def test_send_messages_with_additional_headers():
]
)
requests_mock.post.assert_called_once_with(
url='http://example.com',
'http://example.com',
auth=None,
data=json.dumps({'text': 'test body'}, ensure_ascii=False).encode('utf-8'),
headers={
@@ -223,6 +217,5 @@ def test_send_messages_with_additional_headers():
'X-Test-Header2': 'test-content-2',
},
verify=True,
allow_redirects=False,
)
assert sent_messages == 1

View File

@@ -1,6 +1,6 @@
import pytest
from awx.main.scheduler.task_manager_models import TaskManagerModels
from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups, TaskManagerInstances
class FakeMeta(object):
@@ -16,64 +16,38 @@ class FakeObject(object):
class Job(FakeObject):
def __init__(self, **kwargs):
self.task_impact = kwargs.get('task_impact', 43)
self.is_container_group_task = kwargs.get('is_container_group_task', False)
self.controller_node = kwargs.get('controller_node', '')
self.execution_node = kwargs.get('execution_node', '')
self.instance_group = kwargs.get('instance_group', None)
self.instance_group_id = self.instance_group.id if self.instance_group else None
self.capacity_type = kwargs.get('capacity_type', 'execution')
task_impact = 43
is_container_group_task = False
controller_node = ''
execution_node = ''
def log_format(self):
return 'job 382 (fake)'
class Instances(FakeObject):
def add(self, *args):
for instance in args:
self.obj.instance_list.append(instance)
def all(self):
return self.obj.instance_list
class InstanceGroup(FakeObject):
def __init__(self, **kwargs):
super(InstanceGroup, self).__init__(**kwargs)
self.instance_list = []
self.pk = self.id = kwargs.get('id', 1)
@property
def instances(self):
mgr = Instances(obj=self)
return mgr
@property
def is_container_group(self):
return False
@property
def max_concurrent_jobs(self):
return 0
@property
def max_forks(self):
return 0
class Instance(FakeObject):
def __init__(self, **kwargs):
self.node_type = kwargs.get('node_type', 'hybrid')
self.capacity = kwargs.get('capacity', 0)
self.hostname = kwargs.get('hostname', 'fakehostname')
self.consumed_capacity = 0
self.jobs_running = 0
@pytest.fixture
def sample_cluster():
def stand_up_cluster():
class Instances(FakeObject):
def add(self, *args):
for instance in args:
self.obj.instance_list.append(instance)
def all(self):
return self.obj.instance_list
class InstanceGroup(FakeObject):
def __init__(self, **kwargs):
super(InstanceGroup, self).__init__(**kwargs)
self.instance_list = []
@property
def instances(self):
mgr = Instances(obj=self)
return mgr
class Instance(FakeObject):
pass
ig_small = InstanceGroup(name='ig_small')
ig_large = InstanceGroup(name='ig_large')
@@ -92,12 +66,14 @@ def sample_cluster():
@pytest.fixture
def create_ig_manager():
def _rf(ig_list, tasks):
tm_models = TaskManagerModels.init_with_consumed_capacity(
tasks=tasks,
instances=set(inst for ig in ig_list for inst in ig.instance_list),
instance_groups=ig_list,
)
return tm_models.instance_groups
instances = TaskManagerInstances(tasks, instances=set(inst for ig in ig_list for inst in ig.instance_list))
seed_igs = {}
for ig in ig_list:
seed_igs[ig.name] = {'instances': [instances[inst.hostname] for inst in ig.instance_list]}
instance_groups = TaskManagerInstanceGroups(instance_groups=seed_igs)
return instance_groups
return _rf
@@ -150,75 +126,3 @@ def test_RBAC_reduced_filter(sample_cluster, create_ig_manager):
# Cross-links between groups not visible to current user,
# so a naieve accounting of capacities is returned instead
assert instance_groups_mgr.get_consumed_capacity('default') == 43
def Is(param):
"""
param:
[remaining_capacity1, remaining_capacity2, remaining_capacity3, ...]
[(jobs_running1, capacity1), (jobs_running2, capacity2), (jobs_running3, capacity3), ...]
"""
instances = []
if isinstance(param[0], tuple):
for index, (jobs_running, capacity) in enumerate(param):
inst = Instance(capacity=capacity, node_type='execution', hostname=f'fakehost-{index}')
inst.jobs_running = jobs_running
instances.append(inst)
else:
for index, capacity in enumerate(param):
inst = Instance(capacity=capacity, node_type='execution', hostname=f'fakehost-{index}')
inst.node_type = 'execution'
instances.append(inst)
return instances
class TestSelectBestInstanceForTask(object):
@pytest.mark.parametrize(
'task,instances,instance_fit_index,reason',
[
(Job(task_impact=100), Is([100]), 0, "Only one, pick it"),
(Job(task_impact=100), Is([100, 100]), 0, "Two equally good fits, pick the first"),
(Job(task_impact=100), Is([50, 100]), 1, "First instance not as good as second instance"),
(Job(task_impact=100), Is([50, 0, 20, 100, 100, 100, 30, 20]), 3, "Pick Instance [3] as it is the first that the task fits in."),
(Job(task_impact=100), Is([50, 0, 20, 99, 11, 1, 5, 99]), None, "The task don't a fit, you must a quit!"),
],
)
def test_fit_task_to_most_remaining_capacity_instance(self, task, instances, instance_fit_index, reason):
ig = InstanceGroup(id=10, name='controlplane')
tasks = []
for instance in instances:
ig.instances.add(instance)
for _ in range(instance.jobs_running):
tasks.append(Job(execution_node=instance.hostname, controller_node=instance.hostname, instance_group=ig))
tm_models = TaskManagerModels.init_with_consumed_capacity(tasks=tasks, instances=instances, instance_groups=[ig])
instance_picked = tm_models.instance_groups.fit_task_to_most_remaining_capacity_instance(task, 'controlplane')
if instance_fit_index is None:
assert instance_picked is None, reason
else:
assert instance_picked.hostname == instances[instance_fit_index].hostname, reason
@pytest.mark.parametrize(
'instances,instance_fit_index,reason',
[
(Is([(0, 100)]), 0, "One idle instance, pick it"),
(Is([(1, 100)]), None, "One un-idle instance, pick nothing"),
(Is([(0, 100), (0, 200), (1, 500), (0, 700)]), 3, "Pick the largest idle instance"),
(Is([(0, 100), (0, 200), (1, 10000), (0, 700), (0, 699)]), 3, "Pick the largest idle instance"),
(Is([(0, 0)]), None, "One idle but down instance, don't pick it"),
],
)
def test_find_largest_idle_instance(self, instances, instance_fit_index, reason):
ig = InstanceGroup(id=10, name='controlplane')
tasks = []
for instance in instances:
ig.instances.add(instance)
for _ in range(instance.jobs_running):
tasks.append(Job(execution_node=instance.hostname, controller_node=instance.hostname, instance_group=ig))
tm_models = TaskManagerModels.init_with_consumed_capacity(tasks=tasks, instances=instances, instance_groups=[ig])
if instance_fit_index is None:
assert tm_models.instance_groups.find_largest_idle_instance('controlplane') is None, reason
else:
assert tm_models.instance_groups.find_largest_idle_instance('controlplane').hostname == instances[instance_fit_index].hostname, reason

View File

@@ -3,7 +3,6 @@
# Copyright (c) 2017 Ansible, Inc.
# All Rights Reserved.
import os
import re
import pytest
from uuid import uuid4
import json
@@ -13,13 +12,9 @@ from unittest import mock
from rest_framework.exceptions import ParseError
from awx.main.utils import common
from awx.api.validators import HostnameRegexValidator
from awx.main.models import Job, AdHocCommand, InventoryUpdate, ProjectUpdate, SystemJob, WorkflowJob, Inventory, JobTemplate, UnifiedJobTemplate, UnifiedJob
from django.core.exceptions import ValidationError
from django.utils.regex_helper import _lazy_re_compile
@pytest.mark.parametrize(
'input_, output',
@@ -199,136 +194,3 @@ def test_extract_ansible_vars():
redacted, var_list = common.extract_ansible_vars(json.dumps(my_dict))
assert var_list == set(['ansible_connetion_setting'])
assert redacted == {"foobar": "baz"}
@pytest.mark.parametrize(
'scm_type, url, username, password, check_special_cases, scp_format, expected',
[
# General/random cases
('git', '', True, True, True, False, ''),
('git', 'git://example.com/foo.git', True, True, True, False, 'git://example.com/foo.git'),
('git', 'http://example.com/foo.git', True, True, True, False, 'http://example.com/foo.git'),
('git', 'example.com:bar.git', True, True, True, False, 'git+ssh://example.com/bar.git'),
('git', 'user@example.com:bar.git', True, True, True, False, 'git+ssh://user@example.com/bar.git'),
('git', '127.0.0.1:bar.git', True, True, True, False, 'git+ssh://127.0.0.1/bar.git'),
('git', 'git+ssh://127.0.0.1/bar.git', True, True, True, True, '127.0.0.1:bar.git'),
('git', 'ssh://127.0.0.1:22/bar.git', True, True, True, False, 'ssh://127.0.0.1:22/bar.git'),
('git', 'ssh://root@127.0.0.1:22/bar.git', True, True, True, False, 'ssh://root@127.0.0.1:22/bar.git'),
('git', 'some/path', True, True, True, False, 'file:///some/path'),
('git', '/some/path', True, True, True, False, 'file:///some/path'),
# Invalid URLs - ensure we error properly
('cvs', 'anything', True, True, True, False, ValueError('Unsupported SCM type "cvs"')),
('svn', 'anything-without-colon-slash-slash', True, True, True, False, ValueError('Invalid svn URL')),
('git', 'http://example.com:123invalidport/foo.git', True, True, True, False, ValueError('Invalid git URL')),
('git', 'git+ssh://127.0.0.1/bar.git', True, True, True, False, ValueError('Unsupported git URL')),
('git', 'git@example.com:3000:/git/repo.git', True, True, True, False, ValueError('Invalid git URL')),
('insights', 'git://example.com/foo.git', True, True, True, False, ValueError('Unsupported insights URL')),
('svn', 'file://example/path', True, True, True, False, ValueError('Unsupported host "example" for file:// URL')),
('svn', 'svn:///example', True, True, True, False, ValueError('Host is required for svn URL')),
# Username/password cases
('git', 'https://example@example.com/bar.git', False, True, True, False, 'https://example.com/bar.git'),
('git', 'https://example@example.com/bar.git', 'user', True, True, False, 'https://user@example.com/bar.git'),
('git', 'https://example@example.com/bar.git', 'user:pw', True, True, False, 'https://user%3Apw@example.com/bar.git'),
('git', 'https://example@example.com/bar.git', False, 'pw', True, False, 'https://example.com/bar.git'),
('git', 'https://some:example@example.com/bar.git', True, False, True, False, 'https://some@example.com/bar.git'),
('git', 'https://some:example@example.com/bar.git', False, False, True, False, 'https://example.com/bar.git'),
('git', 'https://example.com/bar.git', 'user', 'pw', True, False, 'https://user:pw@example.com/bar.git'),
('git', 'https://example@example.com/bar.git', False, 'something', True, False, 'https://example.com/bar.git'),
# Special github/bitbucket cases
('git', 'notgit@github.com:ansible/awx.git', True, True, True, False, ValueError('Username must be "git" for SSH access to github.com.')),
(
'git',
'notgit@bitbucket.org:does-not-exist/example.git',
True,
True,
True,
False,
ValueError('Username must be "git" for SSH access to bitbucket.org.'),
),
(
'git',
'notgit@altssh.bitbucket.org:does-not-exist/example.git',
True,
True,
True,
False,
ValueError('Username must be "git" for SSH access to altssh.bitbucket.org.'),
),
('git', 'git:password@github.com:ansible/awx.git', True, True, True, False, 'git+ssh://git@github.com/ansible/awx.git'),
# Disabling the special handling should not raise an error
('git', 'notgit@github.com:ansible/awx.git', True, True, False, False, 'git+ssh://notgit@github.com/ansible/awx.git'),
('git', 'notgit@bitbucket.org:does-not-exist/example.git', True, True, False, False, 'git+ssh://notgit@bitbucket.org/does-not-exist/example.git'),
(
'git',
'notgit@altssh.bitbucket.org:does-not-exist/example.git',
True,
True,
False,
False,
'git+ssh://notgit@altssh.bitbucket.org/does-not-exist/example.git',
),
# awx#12992 - IPv6
('git', 'http://[fd00:1234:2345:6789::11]:3000/foo.git', True, True, True, False, 'http://[fd00:1234:2345:6789::11]:3000/foo.git'),
('git', 'http://foo:bar@[fd00:1234:2345:6789::11]:3000/foo.git', True, True, True, False, 'http://foo:bar@[fd00:1234:2345:6789::11]:3000/foo.git'),
('git', 'example@[fd00:1234:2345:6789::11]:example/foo.git', True, True, True, False, 'git+ssh://example@[fd00:1234:2345:6789::11]/example/foo.git'),
],
)
def test_update_scm_url(scm_type, url, username, password, check_special_cases, scp_format, expected):
if isinstance(expected, Exception):
with pytest.raises(type(expected)) as excinfo:
common.update_scm_url(scm_type, url, username, password, check_special_cases, scp_format)
assert str(excinfo.value) == str(expected)
else:
assert common.update_scm_url(scm_type, url, username, password, check_special_cases, scp_format) == expected
class TestHostnameRegexValidator:
@pytest.fixture
def regex_expr(self):
return '^[a-z0-9][-a-z0-9]*$|^([a-z0-9][-a-z0-9]{0,62}[.])*[a-z0-9][-a-z0-9]{1,62}$'
@pytest.fixture
def re_flags(self):
return re.IGNORECASE
@pytest.fixture
def custom_err_message(self):
return "foobar"
def test_hostame_regex_validator_constructor_with_args(self, regex_expr, re_flags, custom_err_message):
h = HostnameRegexValidator(regex=regex_expr, flags=re_flags, message=custom_err_message)
assert h.regex == _lazy_re_compile(regex_expr, re_flags)
assert h.message == 'foobar'
assert h.code == 'invalid'
assert h.inverse_match == False
assert h.flags == re_flags
def test_hostame_regex_validator_default_constructor(self, regex_expr, re_flags):
h = HostnameRegexValidator()
assert h.regex == _lazy_re_compile(regex_expr, re_flags)
assert h.message == 'Enter a valid value.'
assert h.code == 'invalid'
assert h.inverse_match == False
assert h.flags == re_flags
def test_good_call(self, regex_expr, re_flags):
h = HostnameRegexValidator(regex=regex_expr, flags=re_flags)
assert (h("192.168.56.101"), None)
def test_bad_call(self, regex_expr, re_flags):
h = HostnameRegexValidator(regex=regex_expr, flags=re_flags)
try:
h("@#$%)$#(TUFAS_DG")
except ValidationError as e:
assert e.message is not None
def test_good_call_with_inverse(self, regex_expr, re_flags, inverse_match=True):
h = HostnameRegexValidator(regex=regex_expr, flags=re_flags, inverse_match=inverse_match)
try:
h("1.2.3.4")
except ValidationError as e:
assert e.message is not None
def test_bad_call_with_inverse(self, regex_expr, re_flags, inverse_match=True):
h = HostnameRegexValidator(regex=regex_expr, flags=re_flags, inverse_match=inverse_match)
assert (h("@#$%)$#(TUFAS_DG"), None)

View File

@@ -11,12 +11,11 @@ import os
import subprocess
import re
import stat
import sys
import urllib.parse
import threading
import contextlib
import tempfile
import functools
from functools import reduce, wraps
# Django
from django.core.exceptions import ObjectDoesNotExist, FieldDoesNotExist
@@ -74,7 +73,6 @@ __all__ = [
'NullablePromptPseudoField',
'model_instance_diff',
'parse_yaml_or_json',
'is_testing',
'RequireDebugTrueOrTest',
'has_model_field_prefetched',
'set_environ',
@@ -90,7 +88,6 @@ __all__ = [
'deepmerge',
'get_event_partition_epoch',
'cleanup_new_process',
'log_excess_runtime',
]
@@ -147,19 +144,6 @@ def underscore_to_camelcase(s):
return ''.join(x.capitalize() or '_' for x in s.split('_'))
@functools.cache
def is_testing(argv=None):
'''Return True if running django or py.test unit tests.'''
if 'PYTEST_CURRENT_TEST' in os.environ.keys():
return True
argv = sys.argv if argv is None else argv
if len(argv) >= 1 and ('py.test' in argv[0] or 'py/test.py' in argv[0]):
return True
elif len(argv) >= 2 and argv[1] == 'test':
return True
return False
class RequireDebugTrueOrTest(logging.Filter):
"""
Logging filter to output when in DEBUG mode or running tests.
@@ -168,7 +152,7 @@ class RequireDebugTrueOrTest(logging.Filter):
def filter(self, record):
from django.conf import settings
return settings.DEBUG or is_testing()
return settings.DEBUG or settings.IS_TESTING()
class IllegalArgumentError(ValueError):
@@ -190,7 +174,7 @@ def memoize(ttl=60, cache_key=None, track_function=False, cache=None):
cache = cache or get_memoize_cache()
def memoize_decorator(f):
@functools.wraps(f)
@wraps(f)
def _memoizer(*args, **kwargs):
if track_function:
cache_dict_key = slugify('%r %r' % (args, kwargs))
@@ -280,15 +264,9 @@ def update_scm_url(scm_type, url, username=True, password=True, check_special_ca
userpass, hostpath = url.split('@', 1)
else:
userpass, hostpath = '', url
# Handle IPv6 here. In this case, we might have hostpath of:
# [fd00:1234:2345:6789::11]:example/foo.git
if hostpath.startswith('[') and ']:' in hostpath:
host, path = hostpath.split(']:', 1)
host = host + ']'
elif hostpath.count(':') > 1:
if hostpath.count(':') > 1:
raise ValueError(_('Invalid %s URL') % scm_type)
else:
host, path = hostpath.split(':', 1)
host, path = hostpath.split(':', 1)
# if not path.startswith('/') and not path.startswith('~/'):
# path = '~/%s' % path
# if path.startswith('/'):
@@ -347,11 +325,7 @@ def update_scm_url(scm_type, url, username=True, password=True, check_special_ca
netloc = u':'.join([urllib.parse.quote(x, safe='') for x in (netloc_username, netloc_password) if x])
else:
netloc = u''
# urllib.parse strips brackets from IPv6 addresses, so we need to add them back in
hostname = parts.hostname
if hostname and ':' in hostname and '[' in url and ']' in url:
hostname = f'[{hostname}]'
netloc = u'@'.join(filter(None, [netloc, hostname]))
netloc = u'@'.join(filter(None, [netloc, parts.hostname]))
if parts.port:
netloc = u':'.join([netloc, str(parts.port)])
new_url = urllib.parse.urlunsplit([parts.scheme, netloc, parts.path, parts.query, parts.fragment])
@@ -1008,7 +982,7 @@ def getattrd(obj, name, default=NoDefaultProvided):
"""
try:
return functools.reduce(getattr, name.split("."), obj)
return reduce(getattr, name.split("."), obj)
except AttributeError:
if default != NoDefaultProvided:
return default
@@ -1204,7 +1178,7 @@ def cleanup_new_process(func):
Cleanup django connection, cache connection, before executing new thread or processes entry point, func.
"""
@functools.wraps(func)
@wraps(func)
def wrapper_cleanup_new_process(*args, **kwargs):
from awx.conf.settings import SettingsWrapper # noqa
@@ -1216,30 +1190,15 @@ def cleanup_new_process(func):
return wrapper_cleanup_new_process
def log_excess_runtime(func_logger, cutoff=5.0, debug_cutoff=5.0, msg=None, add_log_data=False):
def log_excess_runtime(func_logger, cutoff=5.0):
def log_excess_runtime_decorator(func):
@functools.wraps(func)
@wraps(func)
def _new_func(*args, **kwargs):
start_time = time.time()
log_data = {'name': repr(func.__name__)}
if add_log_data:
return_value = func(*args, log_data=log_data, **kwargs)
else:
return_value = func(*args, **kwargs)
log_data['delta'] = time.time() - start_time
if isinstance(return_value, dict):
log_data.update(return_value)
if msg is None:
record_msg = 'Running {name} took {delta:.2f}s'
else:
record_msg = msg
if log_data['delta'] > cutoff:
func_logger.info(record_msg.format(**log_data))
elif log_data['delta'] > debug_cutoff:
func_logger.debug(record_msg.format(**log_data))
return_value = func(*args, **kwargs)
delta = time.time() - start_time
if delta > cutoff:
logger.info(f'Running {func.__name__!r} took {delta:.2f}s')
return return_value
return _new_func

View File

@@ -103,10 +103,6 @@ ColorHandler = logging.StreamHandler
if settings.COLOR_LOGS is True:
try:
from logutils.colorize import ColorizingStreamHandler
import colorama
colorama.deinit()
colorama.init(wrap=False, convert=False, strip=False)
class ColorHandler(ColorizingStreamHandler):
def colorize(self, line, record):
@@ -114,7 +110,7 @@ if settings.COLOR_LOGS is True:
# logs rendered with cyan text
previous_level_map = self.level_map.copy()
if record.name == "awx.analytics.job_lifecycle":
self.level_map[logging.INFO] = (None, 'cyan', True)
self.level_map[logging.DEBUG] = (None, 'cyan', True)
msg = super(ColorHandler, self).colorize(line, record)
self.level_map = previous_level_map
return msg

Some files were not shown because too many files have changed in this diff Show More