Compare commits

..

7 Commits

Author SHA1 Message Date
Jeff Bradberry
2ae9156a4a Merge pull request #12587 from ansible/mesh-scaling-backend
Allow for adding external execution nodes via API
2022-08-03 11:09:37 -04:00
Jeff Bradberry
4890c15eeb Update task management to only do things with ready instances 2022-08-02 15:58:43 -04:00
Jeff Bradberry
bc6b8fc4ae Check state when processing receptorctl advertisements
Nodes that show up and were in one of the unready states need to be
transitioned to ready, even if the logic in Instance.is_lost was not
met.
2022-08-02 15:58:20 -04:00
Jeff Bradberry
03c70077f9 Make sure that the health checks handle the state transitions properly
- nodes with states Provisioning, Provisioning Fail, Deprovisioning,
  and Deprovisioning Fail should bypass health checks and should never
  transition due to the existing machinery
- nodes with states Unavailable and Installed can transition to Ready
  if they check out as healthy
- nodes in the Ready state should transition to Unavailable if they
  fail a check
2022-08-02 13:55:35 -04:00
Jeff Bradberry
dab8c3ef55 Update node and link registration to put them in the right state
'Installed' for the nodes, 'Established' for the links.
2022-08-02 13:55:35 -04:00
Jeff Bradberry
d2a6be7ca9 Add the state fields and the peer relationships to the serializers 2022-08-02 13:55:35 -04:00
Jeff Bradberry
170795ab76 Add state fields to Instance and InstanceLink
Also, listener_port to Instance.
2022-08-02 13:55:03 -04:00
427 changed files with 5857 additions and 18707 deletions

View File

@@ -1,2 +1,3 @@
awx/ui/node_modules
Dockerfile
.git

View File

@@ -25,7 +25,7 @@ Instead use the bug or feature request.
<!--- Pick one below and delete the rest: -->
- Breaking Change
- New or Enhanced Feature
- Bug, Docs Fix or other nominal change
- Bug or Docs Fix
##### COMPONENT NAME

View File

@@ -20,19 +20,6 @@ body:
- label: I understand that AWX is open source software provided for free and that I might not receive a timely response.
required: true
- type: dropdown
id: feature-type
attributes:
label: Feature type
description: >-
What kind of feature is this?
multiple: false
options:
- "New Feature"
- "Enhancement to Existing Feature"
validations:
required: true
- type: textarea
id: summary
attributes:
@@ -53,36 +40,3 @@ body:
- label: CLI
- label: Other
- type: textarea
id: steps-to-reproduce
attributes:
label: Steps to reproduce
description: >-
Describe the necessary steps to understand the scenario of the requested enhancement.
Include all the steps that will help the developer and QE team understand what you are requesting.
validations:
required: true
- type: textarea
id: current-results
attributes:
label: Current results
description: What is currently happening on the scenario?
validations:
required: true
- type: textarea
id: sugested-results
attributes:
label: Sugested feature result
description: What is the result this new feature will bring?
validations:
required: true
- type: textarea
id: additional-information
attributes:
label: Additional information
description: Please provide any other information you think is relevant that could help us understand your feature request.
validations:
required: false

View File

@@ -11,7 +11,7 @@ the change does.
<!--- Pick one below and delete the rest: -->
- Breaking Change
- New or Enhanced Feature
- Bug, Docs Fix or other nominal change
- Bug or Docs Fix
##### COMPONENT NAME
<!--- Name of the module/plugin/module/task -->

View File

@@ -93,9 +93,6 @@ The Ansible Community is looking at building an EE that corresponds to all of th
- AWX: https://github.com/ansible/awx/blob/devel/CONTRIBUTING.md
- AWX-Operator: https://github.com/ansible/awx-operator/blob/devel/CONTRIBUTING.md
### Oracle AWX
We'd be happy to help if you can reproduce this with AWX since we do not have Oracle's Linux Automation Manager. If you need help with this specific version of Oracles Linux Automation Manager you will need to contact your Oracle for support.
### AWX Release
Subject: Announcing AWX Xa.Ya.za and AWX-Operator Xb.Yb.zb

View File

@@ -19,34 +19,3 @@ jobs:
not-before: 2021-12-07T07:00:00Z
configuration-path: .github/issue_labeler.yml
enable-versioned-regex: 0
community:
runs-on: ubuntu-latest
name: Label Issue - Community
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v4
- name: Install python requests
run: pip install requests
- name: Check if user is a member of Ansible org
uses: jannekem/run-python-script-action@v1
id: check_user
with:
script: |
import requests
headers = {'Accept': 'application/vnd.github+json', 'Authorization': 'token ${{ secrets.GITHUB_TOKEN }}'}
response = requests.get('${{ fromJson(toJson(github.event.issue.user.url)) }}/orgs?per_page=100', headers=headers)
is_member = False
for org in response.json():
if org['login'] == 'ansible':
is_member = True
if is_member:
print("User is member")
else:
print("User is community")
- name: Add community label if not a member
if: contains(steps.check_user.outputs.stdout, 'community')
uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90
with:
add-labels: "community"
repo-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -18,34 +18,3 @@ jobs:
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/pr_labeler.yml
community:
runs-on: ubuntu-latest
name: Label PR - Community
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v4
- name: Install python requests
run: pip install requests
- name: Check if user is a member of Ansible org
uses: jannekem/run-python-script-action@v1
id: check_user
with:
script: |
import requests
headers = {'Accept': 'application/vnd.github+json', 'Authorization': 'token ${{ secrets.GITHUB_TOKEN }}'}
response = requests.get('${{ fromJson(toJson(github.event.pull_request.user.url)) }}/orgs?per_page=100', headers=headers)
is_member = False
for org in response.json():
if org['login'] == 'ansible':
is_member = True
if is_member:
print("User is member")
else:
print("User is community")
- name: Add community label if not a member
if: contains(steps.check_user.outputs.stdout, 'community')
uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90
with:
add-labels: "community"
repo-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -1,45 +0,0 @@
---
name: PR Check
env:
BRANCH: ${{ github.base_ref || 'devel' }}
on:
pull_request:
types: [opened, edited, reopened, synchronize]
jobs:
pr-check:
name: Scan PR description for semantic versioning keywords
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
steps:
- name: Write PR body to a file
run: |
cat >> pr.body << __SOME_RANDOM_PR_EOF__
${{ github.event.pull_request.body }}
__SOME_RANDOM_PR_EOF__
- name: Display the received body for troubleshooting
run: cat pr.body
# We want to write these out individually just incase the options were joined on a single line
- name: Check for each of the lines
run: |
grep "Bug, Docs Fix or other nominal change" pr.body > Z
grep "New or Enhanced Feature" pr.body > Y
grep "Breaking Change" pr.body > X
exit 0
# We exit 0 and set the shell to prevent the returns from the greps from failing this step
# See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
shell: bash {0}
- name: Check for exactly one item
run: |
if [ $(cat X Y Z | wc -l) != 1 ] ; then
echo "The PR body must contain exactly one of [ 'Bug, Docs Fix or other nominal change', 'New or Enhanced Feature', 'Breaking Change' ]"
echo "We counted $(cat X Y Z | wc -l)"
echo "See the default PR body for examples"
exit 255;
else
exit 0;
fi

View File

@@ -1,29 +0,0 @@
---
name: Dependency Pr Update
on:
pull_request:
types: [labeled, opened, reopened]
jobs:
pr-check:
name: Update Dependabot Prs
if: contains(github.event.pull_request.labels.*.name, 'dependencies') && contains(github.event.pull_request.labels.*.name, 'component:ui')
runs-on: ubuntu-latest
steps:
- name: Checkout branch
uses: actions/checkout@v3
- name: Update PR Body
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
OWNER: ${{ github.repository_owner }}
REPO: ${{ github.event.repository.name }}
PR: ${{github.event.pull_request.number}}
PR_BODY: ${{github.event.pull_request.body}}
run: |
gh pr checkout ${{ env.PR }}
echo "${{ env.PR_BODY }}" > my_pr_body.txt
echo "" >> my_pr_body.txt
echo "Bug, Docs Fix or other nominal change" >> my_pr_body.txt
gh pr edit ${{env.PR}} --body-file my_pr_body.txt

3
.gitignore vendored
View File

@@ -153,6 +153,9 @@ use_dev_supervisor.txt
/sanity/
/awx_collection_build/
# Setup for metrics gathering
tools/prometheus/prometheus.yml
.idea/*
*.unison.tmp
*.#

View File

@@ -8,8 +8,6 @@ ignore: |
awx/ui/test/e2e/tests/smoke-vars.yml
awx/ui/node_modules
tools/docker-compose/_sources
# django template files
awx/api/templates/instance_install_bundle/**
extends: default

View File

@@ -3,7 +3,7 @@ recursive-include awx *.po
recursive-include awx *.mo
recursive-include awx/static *
recursive-include awx/templates *.html
recursive-include awx/api/templates *.md *.html *.yml
recursive-include awx/api/templates *.md *.html
recursive-include awx/ui/build *.html
recursive-include awx/ui/build *
recursive-include awx/playbooks *.yml

View File

@@ -72,7 +72,7 @@ clean-languages:
rm -f $(I18N_FLAG_FILE)
find ./awx/locale/ -type f -regex ".*\.mo$" -delete
## Remove temporary build files, compiled Python files.
# Remove temporary build files, compiled Python files.
clean: clean-ui clean-api clean-awxkit clean-dist
rm -rf awx/public
rm -rf awx/lib/site-packages
@@ -94,7 +94,7 @@ clean-api:
clean-awxkit:
rm -rf awxkit/*.egg-info awxkit/.tox awxkit/build/*
## convenience target to assert environment variables are defined
# convenience target to assert environment variables are defined
guard-%:
@if [ "$${$*}" = "" ]; then \
echo "The required environment variable '$*' is not set"; \
@@ -117,7 +117,7 @@ virtualenv_awx:
fi; \
fi
## Install third-party requirements needed for AWX's environment.
# Install third-party requirements needed for AWX's environment.
# this does not use system site packages intentionally
requirements_awx: virtualenv_awx
if [[ "$(PIP_OPTIONS)" == *"--no-index"* ]]; then \
@@ -136,7 +136,7 @@ requirements_dev: requirements_awx requirements_awx_dev
requirements_test: requirements
## "Install" awx package in development mode.
# "Install" awx package in development mode.
develop:
@if [ "$(VIRTUAL_ENV)" ]; then \
pip uninstall -y awx; \
@@ -153,21 +153,21 @@ version_file:
fi; \
$(PYTHON) -c "import awx; print(awx.__version__)" > /var/lib/awx/.awx_version; \
## Refresh development environment after pulling new code.
# Refresh development environment after pulling new code.
refresh: clean requirements_dev version_file develop migrate
## Create Django superuser.
# Create Django superuser.
adduser:
$(MANAGEMENT_COMMAND) createsuperuser
## Create database tables and apply any new migrations.
# Create database tables and apply any new migrations.
migrate:
if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
fi; \
$(MANAGEMENT_COMMAND) migrate --noinput
## Run after making changes to the models to create a new migration.
# Run after making changes to the models to create a new migration.
dbchange:
$(MANAGEMENT_COMMAND) makemigrations
@@ -218,7 +218,7 @@ wsbroadcast:
fi; \
$(PYTHON) manage.py run_wsbroadcast
## Run to start the background task dispatcher for development.
# Run to start the background task dispatcher for development.
dispatcher:
@if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
@@ -226,7 +226,7 @@ dispatcher:
$(PYTHON) manage.py run_dispatcher
## Run to start the zeromq callback receiver
# Run to start the zeromq callback receiver
receiver:
@if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
@@ -278,7 +278,7 @@ awx-link:
TEST_DIRS ?= awx/main/tests/unit awx/main/tests/functional awx/conf/tests awx/sso/tests
PYTEST_ARGS ?= -n auto
## Run all API unit tests.
# Run all API unit tests.
test:
if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
@@ -341,24 +341,23 @@ test_unit:
fi; \
py.test awx/main/tests/unit awx/conf/tests/unit awx/sso/tests/unit
## Run all API unit tests with coverage enabled.
# Run all API unit tests with coverage enabled.
test_coverage:
@if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
fi; \
py.test --create-db --cov=awx --cov-report=xml --junitxml=./reports/junit.xml $(TEST_DIRS)
## Output test coverage as HTML (into htmlcov directory).
# Output test coverage as HTML (into htmlcov directory).
coverage_html:
coverage html
## Run API unit tests across multiple Python/Django versions with Tox.
# Run API unit tests across multiple Python/Django versions with Tox.
test_tox:
tox -v
# Make fake data
DATA_GEN_PRESET = ""
## Make fake data
bulk_data:
@if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
@@ -379,10 +378,9 @@ clean-ui:
rm -rf $(UI_BUILD_FLAG_FILE)
awx/ui/node_modules:
NODE_OPTIONS=--max-old-space-size=6144 $(NPM_BIN) --prefix awx/ui --loglevel warn --force ci
NODE_OPTIONS=--max-old-space-size=6144 $(NPM_BIN) --prefix awx/ui --loglevel warn ci
$(UI_BUILD_FLAG_FILE):
$(MAKE) awx/ui/node_modules
$(UI_BUILD_FLAG_FILE): awx/ui/node_modules
$(PYTHON) tools/scripts/compilemessages.py
$(NPM_BIN) --prefix awx/ui --loglevel warn run compile-strings
$(NPM_BIN) --prefix awx/ui --loglevel warn run build
@@ -453,11 +451,6 @@ COMPOSE_OPTS ?=
CONTROL_PLANE_NODE_COUNT ?= 1
EXECUTION_NODE_COUNT ?= 2
MINIKUBE_CONTAINER_GROUP ?= false
EXTRA_SOURCES_ANSIBLE_OPTS ?=
ifneq ($(ADMIN_PASSWORD),)
EXTRA_SOURCES_ANSIBLE_OPTS := -e admin_password=$(ADMIN_PASSWORD) $(EXTRA_SOURCES_ANSIBLE_OPTS)
endif
docker-compose-sources: .git/hooks/pre-commit
@if [ $(MINIKUBE_CONTAINER_GROUP) = true ]; then\
@@ -475,8 +468,7 @@ docker-compose-sources: .git/hooks/pre-commit
-e enable_ldap=$(LDAP) \
-e enable_splunk=$(SPLUNK) \
-e enable_prometheus=$(PROMETHEUS) \
-e enable_grafana=$(GRAFANA) $(EXTRA_SOURCES_ANSIBLE_OPTS)
-e enable_grafana=$(GRAFANA)
docker-compose: awx/projects docker-compose-sources
@@ -510,7 +502,7 @@ docker-compose-container-group-clean:
fi
rm -rf tools/docker-compose-minikube/_sources/
## Base development image build
# Base development image build
docker-compose-build:
ansible-playbook tools/ansible/dockerfile.yml -e build_dev=True -e receptor_image=$(RECEPTOR_IMAGE)
DOCKER_BUILDKIT=1 docker build -t $(DEVEL_IMAGE_NAME) \
@@ -528,7 +520,7 @@ docker-clean-volumes: docker-compose-clean docker-compose-container-group-clean
docker-refresh: docker-clean docker-compose
## Docker Development Environment with Elastic Stack Connected
# Docker Development Environment with Elastic Stack Connected
docker-compose-elk: awx/projects docker-compose-sources
docker-compose -f tools/docker-compose/_sources/docker-compose.yml -f tools/elastic/docker-compose.logstash-link.yml -f tools/elastic/docker-compose.elastic-override.yml up --no-recreate
@@ -565,34 +557,26 @@ Dockerfile.kube-dev: tools/ansible/roles/dockerfile/templates/Dockerfile.j2
-e template_dest=_build_kube_dev \
-e receptor_image=$(RECEPTOR_IMAGE)
## Build awx_kube_devel image for development on local Kubernetes environment.
awx-kube-dev-build: Dockerfile.kube-dev
DOCKER_BUILDKIT=1 docker build -f Dockerfile.kube-dev \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--cache-from=$(DEV_DOCKER_TAG_BASE)/awx_kube_devel:$(COMPOSE_TAG) \
-t $(DEV_DOCKER_TAG_BASE)/awx_kube_devel:$(COMPOSE_TAG) .
## Build awx image for deployment on Kubernetes environment.
awx-kube-build: Dockerfile
DOCKER_BUILDKIT=1 docker build -f Dockerfile \
--build-arg VERSION=$(VERSION) \
--build-arg SETUPTOOLS_SCM_PRETEND_VERSION=$(VERSION) \
--build-arg HEADLESS=$(HEADLESS) \
-t $(DEV_DOCKER_TAG_BASE)/awx:$(COMPOSE_TAG) .
# Translation TASKS
# --------------------------------------
## generate UI .pot file, an empty template of strings yet to be translated
# generate UI .pot file, an empty template of strings yet to be translated
pot: $(UI_BUILD_FLAG_FILE)
$(NPM_BIN) --prefix awx/ui --loglevel warn run extract-template --clean
## generate UI .po files for each locale (will update translated strings for `en`)
# generate UI .po files for each locale (will update translated strings for `en`)
po: $(UI_BUILD_FLAG_FILE)
$(NPM_BIN) --prefix awx/ui --loglevel warn run extract-strings -- --clean
LANG = "en_us"
## generate API django .pot .po
# generate API django .pot .po
LANG = "en-us"
messages:
@if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/awx/bin/activate; \
@@ -601,38 +585,3 @@ messages:
print-%:
@echo $($*)
# HELP related targets
# --------------------------------------
HELP_FILTER=.PHONY
## Display help targets
help:
@printf "Available targets:\n"
@make -s help/generate | grep -vE "\w($(HELP_FILTER))"
## Display help for all targets
help/all:
@printf "Available targets:\n"
@make -s help/generate
## Generate help output from MAKEFILE_LIST
help/generate:
@awk '/^[-a-zA-Z_0-9%:\\\.\/]+:/ { \
helpMessage = match(lastLine, /^## (.*)/); \
if (helpMessage) { \
helpCommand = $$1; \
helpMessage = substr(lastLine, RSTART + 3, RLENGTH); \
gsub("\\\\", "", helpCommand); \
gsub(":+$$", "", helpCommand); \
printf " \x1b[32;01m%-35s\x1b[0m %s\n", helpCommand, helpMessage; \
} else { \
helpCommand = $$1; \
gsub("\\\\", "", helpCommand); \
gsub(":+$$", "", helpCommand); \
printf " \x1b[32;01m%-35s\x1b[0m %s\n", helpCommand, "No help available"; \
} \
} \
{ lastLine = $$0 }' $(MAKEFILE_LIST) | sort -u
@printf "\n"

View File

@@ -190,7 +190,7 @@ def manage():
sys.stdout.write('%s\n' % __version__)
# If running as a user without permission to read settings, display an
# error message. Allow --help to still work.
elif not os.getenv('SKIP_SECRET_KEY_CHECK', False) and settings.SECRET_KEY == 'permission-denied':
elif settings.SECRET_KEY == 'permission-denied':
if len(sys.argv) == 1 or len(sys.argv) >= 2 and sys.argv[1] in ('-h', '--help', 'help'):
execute_from_command_line(sys.argv)
sys.stdout.write('\n')

View File

@@ -157,7 +157,7 @@ class FieldLookupBackend(BaseFilterBackend):
# A list of fields that we know can be filtered on without the possiblity
# of introducing duplicates
NO_DUPLICATES_ALLOW_LIST = (CharField, IntegerField, BooleanField, TextField)
NO_DUPLICATES_ALLOW_LIST = (CharField, IntegerField, BooleanField)
def get_fields_from_lookup(self, model, lookup):

View File

@@ -63,6 +63,7 @@ __all__ = [
'SubDetailAPIView',
'ResourceAccessList',
'ParentMixin',
'DeleteLastUnattachLabelMixin',
'SubListAttachDetachAPIView',
'CopyAPIView',
'BaseUsersList',
@@ -97,6 +98,7 @@ class LoggedLoginView(auth_views.LoginView):
current_user = UserSerializer(self.request.user)
current_user = smart_str(JSONRenderer().render(current_user.data))
current_user = urllib.parse.quote('%s' % current_user, '')
ret.set_cookie('current_user', current_user, secure=settings.SESSION_COOKIE_SECURE or None)
ret.setdefault('X-API-Session-Cookie-Name', getattr(settings, 'SESSION_COOKIE_NAME', 'awx_sessionid'))
return ret
@@ -773,6 +775,28 @@ class SubListAttachDetachAPIView(SubListCreateAttachDetachAPIView):
return {'id': None}
class DeleteLastUnattachLabelMixin(object):
"""
Models for which you want the last instance to be deleted from the database
when the last disassociate is called should inherit from this class. Further,
the model should implement is_detached()
"""
def unattach(self, request, *args, **kwargs):
(sub_id, res) = super(DeleteLastUnattachLabelMixin, self).unattach_validate(request)
if res:
return res
res = super(DeleteLastUnattachLabelMixin, self).unattach_by_id(request, sub_id)
obj = self.model.objects.get(id=sub_id)
if obj.is_detached():
obj.delete()
return res
class SubDetailAPIView(ParentMixin, generics.RetrieveAPIView, GenericAPIView):
pass

View File

@@ -154,7 +154,6 @@ SUMMARIZABLE_FK_FIELDS = {
'source_project': DEFAULT_SUMMARY_FIELDS + ('status', 'scm_type'),
'project_update': DEFAULT_SUMMARY_FIELDS + ('status', 'failed'),
'credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'cloud', 'kubernetes', 'credential_type_id'),
'signature_validation_credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'credential_type_id'),
'job': DEFAULT_SUMMARY_FIELDS + ('status', 'failed', 'elapsed', 'type', 'canceled_on'),
'job_template': DEFAULT_SUMMARY_FIELDS,
'workflow_job_template': DEFAULT_SUMMARY_FIELDS,
@@ -615,7 +614,7 @@ class BaseSerializer(serializers.ModelSerializer, metaclass=BaseSerializerMetacl
def validate(self, attrs):
attrs = super(BaseSerializer, self).validate(attrs)
try:
# Create/update a model instance and run its full_clean() method to
# Create/update a model instance and run it's full_clean() method to
# do any validation implemented on the model class.
exclusions = self.get_validation_exclusions(self.instance)
obj = self.instance or self.Meta.model()
@@ -1471,7 +1470,6 @@ class ProjectSerializer(UnifiedJobTemplateSerializer, ProjectOptionsSerializer):
'allow_override',
'custom_virtualenv',
'default_environment',
'signature_validation_credential',
) + (
'last_update_failed',
'last_updated',
@@ -1680,7 +1678,6 @@ class InventorySerializer(LabelsListMixin, BaseSerializerWithVariables):
'total_inventory_sources',
'inventory_sources_with_failures',
'pending_deletion',
'prevent_instance_group_fallback',
)
def get_related(self, obj):
@@ -2233,7 +2230,6 @@ class InventoryUpdateSerializer(UnifiedJobSerializer, InventorySourceOptionsSeri
'source_project_update',
'custom_virtualenv',
'instance_group',
'scm_revision',
)
def get_related(self, obj):
@@ -2924,12 +2920,6 @@ class JobTemplateSerializer(JobTemplateMixin, UnifiedJobTemplateSerializer, JobO
'ask_verbosity_on_launch',
'ask_inventory_on_launch',
'ask_credential_on_launch',
'ask_execution_environment_on_launch',
'ask_labels_on_launch',
'ask_forks_on_launch',
'ask_job_slice_count_on_launch',
'ask_timeout_on_launch',
'ask_instance_groups_on_launch',
'survey_enabled',
'become_enabled',
'diff_mode',
@@ -2938,7 +2928,6 @@ class JobTemplateSerializer(JobTemplateMixin, UnifiedJobTemplateSerializer, JobO
'job_slice_count',
'webhook_service',
'webhook_credential',
'prevent_instance_group_fallback',
)
read_only_fields = ('*', 'custom_virtualenv')
@@ -3193,7 +3182,7 @@ class JobRelaunchSerializer(BaseSerializer):
return attrs
class JobCreateScheduleSerializer(LabelsListMixin, BaseSerializer):
class JobCreateScheduleSerializer(BaseSerializer):
can_schedule = serializers.SerializerMethodField()
prompts = serializers.SerializerMethodField()
@@ -3219,17 +3208,14 @@ class JobCreateScheduleSerializer(LabelsListMixin, BaseSerializer):
try:
config = obj.launch_config
ret = config.prompts_dict(display=True)
for field_name in ('inventory', 'execution_environment'):
if field_name in ret:
ret[field_name] = self._summarize(field_name, ret[field_name])
for field_name, singular in (('credentials', 'credential'), ('instance_groups', 'instance_group')):
if field_name in ret:
ret[field_name] = [self._summarize(singular, obj) for obj in ret[field_name]]
if 'labels' in ret:
ret['labels'] = self._summary_field_labels(config)
if 'inventory' in ret:
ret['inventory'] = self._summarize('inventory', ret['inventory'])
if 'credentials' in ret:
all_creds = [self._summarize('credential', cred) for cred in ret['credentials']]
ret['credentials'] = all_creds
return ret
except JobLaunchConfig.DoesNotExist:
return {'all': _('Unknown, job may have been run before launch configurations were saved.')}
return {'all': _('Unknown, job may have been ran before launch configurations were saved.')}
class AdHocCommandSerializer(UnifiedJobSerializer):
@@ -3399,9 +3385,6 @@ class WorkflowJobTemplateSerializer(JobTemplateMixin, LabelsListMixin, UnifiedJo
limit = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
scm_branch = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
skip_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
job_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
class Meta:
model = WorkflowJobTemplate
fields = (
@@ -3420,11 +3403,6 @@ class WorkflowJobTemplateSerializer(JobTemplateMixin, LabelsListMixin, UnifiedJo
'webhook_service',
'webhook_credential',
'-execution_environment',
'ask_labels_on_launch',
'ask_skip_tags_on_launch',
'ask_tags_on_launch',
'skip_tags',
'job_tags',
)
def get_related(self, obj):
@@ -3468,7 +3446,7 @@ class WorkflowJobTemplateSerializer(JobTemplateMixin, LabelsListMixin, UnifiedJo
# process char_prompts, these are not direct fields on the model
mock_obj = self.Meta.model()
for field_name in ('scm_branch', 'limit', 'skip_tags', 'job_tags'):
for field_name in ('scm_branch', 'limit'):
if field_name in attrs:
setattr(mock_obj, field_name, attrs[field_name])
attrs.pop(field_name)
@@ -3494,9 +3472,6 @@ class WorkflowJobSerializer(LabelsListMixin, UnifiedJobSerializer):
limit = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
scm_branch = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
skip_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
job_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
class Meta:
model = WorkflowJob
fields = (
@@ -3516,8 +3491,6 @@ class WorkflowJobSerializer(LabelsListMixin, UnifiedJobSerializer):
'webhook_service',
'webhook_credential',
'webhook_guid',
'skip_tags',
'job_tags',
)
def get_related(self, obj):
@@ -3634,9 +3607,6 @@ class LaunchConfigurationBaseSerializer(BaseSerializer):
skip_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
diff_mode = serializers.BooleanField(required=False, allow_null=True, default=None)
verbosity = serializers.ChoiceField(allow_null=True, required=False, default=None, choices=VERBOSITY_CHOICES)
forks = serializers.IntegerField(required=False, allow_null=True, min_value=0, default=None)
job_slice_count = serializers.IntegerField(required=False, allow_null=True, min_value=0, default=None)
timeout = serializers.IntegerField(required=False, allow_null=True, default=None)
exclude_errors = ()
class Meta:
@@ -3652,21 +3622,13 @@ class LaunchConfigurationBaseSerializer(BaseSerializer):
'skip_tags',
'diff_mode',
'verbosity',
'execution_environment',
'forks',
'job_slice_count',
'timeout',
)
def get_related(self, obj):
res = super(LaunchConfigurationBaseSerializer, self).get_related(obj)
if obj.inventory_id:
res['inventory'] = self.reverse('api:inventory_detail', kwargs={'pk': obj.inventory_id})
if obj.execution_environment_id:
res['execution_environment'] = self.reverse('api:execution_environment_detail', kwargs={'pk': obj.execution_environment_id})
res['labels'] = self.reverse('api:{}_labels_list'.format(get_type_for_model(self.Meta.model)), kwargs={'pk': obj.pk})
res['credentials'] = self.reverse('api:{}_credentials_list'.format(get_type_for_model(self.Meta.model)), kwargs={'pk': obj.pk})
res['instance_groups'] = self.reverse('api:{}_instance_groups_list'.format(get_type_for_model(self.Meta.model)), kwargs={'pk': obj.pk})
return res
def _build_mock_obj(self, attrs):
@@ -4118,6 +4080,7 @@ class SystemJobEventSerializer(AdHocCommandEventSerializer):
class JobLaunchSerializer(BaseSerializer):
# Representational fields
passwords_needed_to_start = serializers.ReadOnlyField()
can_start_without_user_input = serializers.BooleanField(read_only=True)
@@ -4140,12 +4103,6 @@ class JobLaunchSerializer(BaseSerializer):
skip_tags = serializers.CharField(required=False, write_only=True, allow_blank=True)
limit = serializers.CharField(required=False, write_only=True, allow_blank=True)
verbosity = serializers.ChoiceField(required=False, choices=VERBOSITY_CHOICES, write_only=True)
execution_environment = serializers.PrimaryKeyRelatedField(queryset=ExecutionEnvironment.objects.all(), required=False, write_only=True)
labels = serializers.PrimaryKeyRelatedField(many=True, queryset=Label.objects.all(), required=False, write_only=True)
forks = serializers.IntegerField(required=False, write_only=True, min_value=0)
job_slice_count = serializers.IntegerField(required=False, write_only=True, min_value=0)
timeout = serializers.IntegerField(required=False, write_only=True)
instance_groups = serializers.PrimaryKeyRelatedField(many=True, queryset=InstanceGroup.objects.all(), required=False, write_only=True)
class Meta:
model = JobTemplate
@@ -4173,12 +4130,6 @@ class JobLaunchSerializer(BaseSerializer):
'ask_verbosity_on_launch',
'ask_inventory_on_launch',
'ask_credential_on_launch',
'ask_execution_environment_on_launch',
'ask_labels_on_launch',
'ask_forks_on_launch',
'ask_job_slice_count_on_launch',
'ask_timeout_on_launch',
'ask_instance_groups_on_launch',
'survey_enabled',
'variables_needed_to_start',
'credential_needed_to_start',
@@ -4186,12 +4137,6 @@ class JobLaunchSerializer(BaseSerializer):
'job_template_data',
'defaults',
'verbosity',
'execution_environment',
'labels',
'forks',
'job_slice_count',
'timeout',
'instance_groups',
)
read_only_fields = (
'ask_scm_branch_on_launch',
@@ -4204,12 +4149,6 @@ class JobLaunchSerializer(BaseSerializer):
'ask_verbosity_on_launch',
'ask_inventory_on_launch',
'ask_credential_on_launch',
'ask_execution_environment_on_launch',
'ask_labels_on_launch',
'ask_forks_on_launch',
'ask_job_slice_count_on_launch',
'ask_timeout_on_launch',
'ask_instance_groups_on_launch',
)
def get_credential_needed_to_start(self, obj):
@@ -4234,17 +4173,6 @@ class JobLaunchSerializer(BaseSerializer):
if cred.credential_type.managed and 'vault_id' in cred.credential_type.defined_fields:
cred_dict['vault_id'] = cred.get_input('vault_id', default=None)
defaults_dict.setdefault(field_name, []).append(cred_dict)
elif field_name == 'execution_environment':
if obj.execution_environment_id:
defaults_dict[field_name] = {'id': obj.execution_environment.id, 'name': obj.execution_environment.name}
else:
defaults_dict[field_name] = {}
elif field_name == 'labels':
for label in obj.labels.all():
label_dict = {'id': label.id, 'name': label.name}
defaults_dict.setdefault(field_name, []).append(label_dict)
elif field_name == 'instance_groups':
defaults_dict[field_name] = []
else:
defaults_dict[field_name] = getattr(obj, field_name)
return defaults_dict
@@ -4267,15 +4195,6 @@ class JobLaunchSerializer(BaseSerializer):
elif template.project.status in ('error', 'failed'):
errors['playbook'] = _("Missing a revision to run due to failed project update.")
latest_update = template.project.project_updates.last()
if latest_update is not None and latest_update.failed:
failed_validation_tasks = latest_update.project_update_events.filter(
event='runner_on_failed',
play="Perform project signature/checksum verification",
)
if failed_validation_tasks:
errors['playbook'] = _("Last project update failed due to signature validation failure.")
# cannot run a playbook without an inventory
if template.inventory and template.inventory.pending_deletion is True:
errors['inventory'] = _("The inventory associated with this Job Template is being deleted.")
@@ -4352,10 +4271,6 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
scm_branch = serializers.CharField(required=False, write_only=True, allow_blank=True)
workflow_job_template_data = serializers.SerializerMethodField()
labels = serializers.PrimaryKeyRelatedField(many=True, queryset=Label.objects.all(), required=False, write_only=True)
skip_tags = serializers.CharField(required=False, write_only=True, allow_blank=True)
job_tags = serializers.CharField(required=False, write_only=True, allow_blank=True)
class Meta:
model = WorkflowJobTemplate
fields = (
@@ -4375,22 +4290,8 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
'workflow_job_template_data',
'survey_enabled',
'ask_variables_on_launch',
'ask_labels_on_launch',
'labels',
'ask_skip_tags_on_launch',
'ask_tags_on_launch',
'skip_tags',
'job_tags',
)
read_only_fields = (
'ask_inventory_on_launch',
'ask_variables_on_launch',
'ask_skip_tags_on_launch',
'ask_labels_on_launch',
'ask_limit_on_launch',
'ask_scm_branch_on_launch',
'ask_tags_on_launch',
)
read_only_fields = ('ask_inventory_on_launch', 'ask_variables_on_launch')
def get_survey_enabled(self, obj):
if obj:
@@ -4398,15 +4299,10 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
return False
def get_defaults(self, obj):
defaults_dict = {}
for field_name in WorkflowJobTemplate.get_ask_mapping().keys():
if field_name == 'inventory':
defaults_dict[field_name] = dict(name=getattrd(obj, '%s.name' % field_name, None), id=getattrd(obj, '%s.pk' % field_name, None))
elif field_name == 'labels':
for label in obj.labels.all():
label_dict = {"id": label.id, "name": label.name}
defaults_dict.setdefault(field_name, []).append(label_dict)
else:
defaults_dict[field_name] = getattr(obj, field_name)
return defaults_dict
@@ -4415,7 +4311,6 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
return dict(name=obj.name, id=obj.id, description=obj.description)
def validate(self, attrs):
template = self.instance
accepted, rejected, errors = template._accept_or_ignore_job_kwargs(**attrs)
@@ -4433,7 +4328,6 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
WFJT_inventory = template.inventory
WFJT_limit = template.limit
WFJT_scm_branch = template.scm_branch
super(WorkflowJobLaunchSerializer, self).validate(attrs)
template.extra_vars = WFJT_extra_vars
template.inventory = WFJT_inventory
@@ -4825,8 +4719,6 @@ class ScheduleSerializer(LaunchConfigurationBaseSerializer, SchedulePreviewSeria
if isinstance(obj.unified_job_template, SystemJobTemplate):
summary_fields['unified_job_template']['job_type'] = obj.unified_job_template.job_type
# We are not showing instance groups on summary fields because JTs don't either
if 'inventory' in summary_fields:
return summary_fields
@@ -4874,62 +4766,49 @@ class InstanceNodeSerializer(BaseSerializer):
class InstanceSerializer(BaseSerializer):
show_capabilities = ['edit']
consumed_capacity = serializers.SerializerMethodField()
percent_capacity_remaining = serializers.SerializerMethodField()
jobs_running = serializers.IntegerField(help_text=_('Count of jobs in the running or waiting state that are targeted for this instance'), read_only=True)
jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance'), read_only=True)
health_check_pending = serializers.SerializerMethodField()
class Meta:
model = Instance
read_only_fields = ('ip_address', 'uuid', 'version')
read_only_fields = ('uuid', 'hostname', 'version', 'node_type', 'node_state')
fields = (
'id',
'hostname',
'type',
'url',
'related',
'summary_fields',
'uuid',
'created',
'modified',
'last_seen',
'health_check_started',
'health_check_pending',
'last_health_check',
'errors',
"id",
"type",
"url",
"related",
"summary_fields",
"uuid",
"hostname",
"created",
"modified",
"last_seen",
"last_health_check",
"errors",
'capacity_adjustment',
'version',
'capacity',
'consumed_capacity',
'percent_capacity_remaining',
'jobs_running',
'jobs_total',
'cpu',
'memory',
'cpu_capacity',
'mem_capacity',
'enabled',
'managed_by_policy',
'node_type',
'node_state',
'ip_address',
'listener_port',
"version",
"capacity",
"consumed_capacity",
"percent_capacity_remaining",
"jobs_running",
"jobs_total",
"cpu",
"memory",
"cpu_capacity",
"mem_capacity",
"enabled",
"managed_by_policy",
"node_type",
"node_state",
)
extra_kwargs = {
'node_type': {'initial': Instance.Types.EXECUTION, 'default': Instance.Types.EXECUTION},
'node_state': {'initial': Instance.States.INSTALLED, 'default': Instance.States.INSTALLED},
}
def get_related(self, obj):
res = super(InstanceSerializer, self).get_related(obj)
res['jobs'] = self.reverse('api:instance_unified_jobs_list', kwargs={'pk': obj.pk})
res['instance_groups'] = self.reverse('api:instance_instance_groups_list', kwargs={'pk': obj.pk})
if settings.IS_K8S and obj.node_type in (Instance.Types.EXECUTION,):
res['install_bundle'] = self.reverse('api:instance_install_bundle', kwargs={'pk': obj.pk})
res['peers'] = self.reverse('api:instance_peers_list', kwargs={"pk": obj.pk})
if self.context['request'].user.is_superuser or self.context['request'].user.is_system_auditor:
if obj.node_type != 'hop':
res['health_check'] = self.reverse('api:instance_health_check', kwargs={'pk': obj.pk})
@@ -4938,7 +4817,6 @@ class InstanceSerializer(BaseSerializer):
def get_summary_fields(self, obj):
summary = super().get_summary_fields(obj)
# use this handle to distinguish between a listView and a detailView
if self.is_detail_view:
summary['links'] = InstanceLinkSerializer(InstanceLink.objects.select_related('target', 'source').filter(source=obj), many=True).data
@@ -4953,54 +4831,10 @@ class InstanceSerializer(BaseSerializer):
else:
return float("{0:.2f}".format(((float(obj.capacity) - float(obj.consumed_capacity)) / (float(obj.capacity))) * 100))
def get_health_check_pending(self, obj):
return obj.health_check_pending
def validate(self, data):
if self.instance:
if self.instance.node_type == Instance.Types.HOP:
raise serializers.ValidationError("Hop node instances may not be changed.")
else:
if not settings.IS_K8S:
raise serializers.ValidationError("Can only create instances on Kubernetes or OpenShift.")
return data
def validate_node_type(self, value):
if not self.instance:
if value not in (Instance.Types.EXECUTION,):
raise serializers.ValidationError("Can only create execution nodes.")
else:
if self.instance.node_type != value:
raise serializers.ValidationError("Cannot change node type.")
return value
def validate_node_state(self, value):
if self.instance:
if value != self.instance.node_state:
if not settings.IS_K8S:
raise serializers.ValidationError("Can only change the state on Kubernetes or OpenShift.")
if value != Instance.States.DEPROVISIONING:
raise serializers.ValidationError("Can only change instances to the 'deprovisioning' state.")
if self.instance.node_type not in (Instance.Types.EXECUTION,):
raise serializers.ValidationError("Can only deprovision execution nodes.")
else:
if value and value != Instance.States.INSTALLED:
raise serializers.ValidationError("Can only create instances in the 'installed' state.")
return value
def validate_hostname(self, value):
if self.instance and self.instance.hostname != value:
raise serializers.ValidationError("Cannot change hostname.")
return value
def validate_listener_port(self, value):
if self.instance and self.instance.listener_port != value:
raise serializers.ValidationError("Cannot change listener port.")
return value
def validate(self, attrs):
if self.instance.node_type == 'hop':
raise serializers.ValidationError(_('Hop node instances may not be changed.'))
return attrs
class InstanceHealthCheckSerializer(BaseSerializer):

View File

@@ -1,21 +0,0 @@
receptor_verify: true
receptor_tls: true
receptor_work_commands:
ansible-runner:
command: ansible-runner
params: worker
allowruntimeparams: true
verifysignature: true
custom_worksign_public_keyfile: receptor/work-public-key.pem
custom_tls_certfile: receptor/tls/receptor.crt
custom_tls_keyfile: receptor/tls/receptor.key
custom_ca_certfile: receptor/tls/ca/receptor-ca.crt
receptor_user: awx
receptor_group: awx
receptor_protocol: 'tcp'
receptor_listener: true
receptor_port: {{ instance.listener_port }}
receptor_dependencies:
- podman
- crun
- python39-pip

View File

@@ -1,18 +0,0 @@
{% verbatim %}
---
- hosts: all
become: yes
tasks:
- name: Create the receptor user
user:
name: "{{ receptor_user }}"
shell: /bin/bash
- name: Enable Copr repo for Receptor
command: dnf copr enable ansible-awx/receptor -y
- import_role:
name: ansible.receptor.setup
- name: Install ansible-runner
pip:
name: ansible-runner
executable: pip3.9
{% endverbatim %}

View File

@@ -1,7 +0,0 @@
---
all:
hosts:
remote-execution:
ansible_host: {{ instance.hostname }}
ansible_user: <username> # user provided
ansible_ssh_private_key_file: ~/.ssh/id_rsa

View File

@@ -1,6 +0,0 @@
---
collections:
- name: ansible.receptor
source: https://github.com/ansible/receptor-collection/
type: git
version: 0.1.1

View File

@@ -1,17 +0,0 @@
from django.urls import re_path
from awx.api.views.debug import (
DebugRootView,
TaskManagerDebugView,
DependencyManagerDebugView,
WorkflowManagerDebugView,
)
urls = [
re_path(r'^$', DebugRootView.as_view(), name='debug'),
re_path(r'^task_manager/$', TaskManagerDebugView.as_view(), name='task_manager'),
re_path(r'^dependency_manager/$', DependencyManagerDebugView.as_view(), name='dependency_manager'),
re_path(r'^workflow_manager/$', WorkflowManagerDebugView.as_view(), name='workflow_manager'),
]
__all__ = ['urls']

View File

@@ -3,15 +3,7 @@
from django.urls import re_path
from awx.api.views import (
InstanceList,
InstanceDetail,
InstanceUnifiedJobsList,
InstanceInstanceGroupsList,
InstanceHealthCheck,
InstanceInstallBundle,
InstancePeersList,
)
from awx.api.views import InstanceList, InstanceDetail, InstanceUnifiedJobsList, InstanceInstanceGroupsList, InstanceHealthCheck
urls = [
@@ -20,8 +12,6 @@ urls = [
re_path(r'^(?P<pk>[0-9]+)/jobs/$', InstanceUnifiedJobsList.as_view(), name='instance_unified_jobs_list'),
re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', InstanceInstanceGroupsList.as_view(), name='instance_instance_groups_list'),
re_path(r'^(?P<pk>[0-9]+)/health_check/$', InstanceHealthCheck.as_view(), name='instance_health_check'),
re_path(r'^(?P<pk>[0-9]+)/peers/$', InstancePeersList.as_view(), name='instance_peers_list'),
re_path(r'^(?P<pk>[0-9]+)/install_bundle/$', InstanceInstallBundle.as_view(), name='instance_install_bundle'),
]
__all__ = ['urls']

View File

@@ -3,7 +3,7 @@
from django.urls import re_path
from awx.api.views.labels import LabelList, LabelDetail
from awx.api.views import LabelList, LabelDetail
urls = [re_path(r'^$', LabelList.as_view(), name='label_list'), re_path(r'^(?P<pk>[0-9]+)/$', LabelDetail.as_view(), name='label_detail')]

View File

@@ -3,7 +3,7 @@
from django.urls import re_path
from awx.api.views import ScheduleList, ScheduleDetail, ScheduleUnifiedJobsList, ScheduleCredentialsList, ScheduleLabelsList, ScheduleInstanceGroupList
from awx.api.views import ScheduleList, ScheduleDetail, ScheduleUnifiedJobsList, ScheduleCredentialsList
urls = [
@@ -11,8 +11,6 @@ urls = [
re_path(r'^(?P<pk>[0-9]+)/$', ScheduleDetail.as_view(), name='schedule_detail'),
re_path(r'^(?P<pk>[0-9]+)/jobs/$', ScheduleUnifiedJobsList.as_view(), name='schedule_unified_jobs_list'),
re_path(r'^(?P<pk>[0-9]+)/credentials/$', ScheduleCredentialsList.as_view(), name='schedule_credentials_list'),
re_path(r'^(?P<pk>[0-9]+)/labels/$', ScheduleLabelsList.as_view(), name='schedule_labels_list'),
re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', ScheduleInstanceGroupList.as_view(), name='schedule_instance_groups_list'),
]
__all__ = ['urls']

View File

@@ -2,9 +2,9 @@
# All Rights Reserved.
from __future__ import absolute_import, unicode_literals
from django.conf import settings
from django.urls import include, re_path
from awx import MODE
from awx.api.generics import LoggedLoginView, LoggedLogoutView
from awx.api.views import (
ApiRootView,
@@ -145,12 +145,7 @@ urlpatterns = [
re_path(r'^logout/$', LoggedLogoutView.as_view(next_page='/api/', redirect_field_name='next'), name='logout'),
re_path(r'^o/', include(oauth2_root_urls)),
]
if MODE == 'development':
# Only include these if we are in the development environment
if settings.SETTINGS_MODULE == 'awx.settings.development':
from awx.api.swagger import SwaggerSchemaView
urlpatterns += [re_path(r'^swagger/$', SwaggerSchemaView.as_view(), name='swagger_view')]
from awx.api.urls.debug import urls as debug_urls
urlpatterns += [re_path(r'^debug/', include(debug_urls))]

View File

@@ -10,8 +10,6 @@ from awx.api.views import (
WorkflowJobNodeFailureNodesList,
WorkflowJobNodeAlwaysNodesList,
WorkflowJobNodeCredentialsList,
WorkflowJobNodeLabelsList,
WorkflowJobNodeInstanceGroupsList,
)
@@ -22,8 +20,6 @@ urls = [
re_path(r'^(?P<pk>[0-9]+)/failure_nodes/$', WorkflowJobNodeFailureNodesList.as_view(), name='workflow_job_node_failure_nodes_list'),
re_path(r'^(?P<pk>[0-9]+)/always_nodes/$', WorkflowJobNodeAlwaysNodesList.as_view(), name='workflow_job_node_always_nodes_list'),
re_path(r'^(?P<pk>[0-9]+)/credentials/$', WorkflowJobNodeCredentialsList.as_view(), name='workflow_job_node_credentials_list'),
re_path(r'^(?P<pk>[0-9]+)/labels/$', WorkflowJobNodeLabelsList.as_view(), name='workflow_job_node_labels_list'),
re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', WorkflowJobNodeInstanceGroupsList.as_view(), name='workflow_job_node_instance_groups_list'),
]
__all__ = ['urls']

View File

@@ -11,8 +11,6 @@ from awx.api.views import (
WorkflowJobTemplateNodeAlwaysNodesList,
WorkflowJobTemplateNodeCredentialsList,
WorkflowJobTemplateNodeCreateApproval,
WorkflowJobTemplateNodeLabelsList,
WorkflowJobTemplateNodeInstanceGroupsList,
)
@@ -23,8 +21,6 @@ urls = [
re_path(r'^(?P<pk>[0-9]+)/failure_nodes/$', WorkflowJobTemplateNodeFailureNodesList.as_view(), name='workflow_job_template_node_failure_nodes_list'),
re_path(r'^(?P<pk>[0-9]+)/always_nodes/$', WorkflowJobTemplateNodeAlwaysNodesList.as_view(), name='workflow_job_template_node_always_nodes_list'),
re_path(r'^(?P<pk>[0-9]+)/credentials/$', WorkflowJobTemplateNodeCredentialsList.as_view(), name='workflow_job_template_node_credentials_list'),
re_path(r'^(?P<pk>[0-9]+)/labels/$', WorkflowJobTemplateNodeLabelsList.as_view(), name='workflow_job_template_node_labels_list'),
re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', WorkflowJobTemplateNodeInstanceGroupsList.as_view(), name='workflow_job_template_node_instance_groups_list'),
re_path(r'^(?P<pk>[0-9]+)/create_approval_template/$', WorkflowJobTemplateNodeCreateApproval.as_view(), name='workflow_job_template_node_create_approval'),
]

View File

@@ -22,7 +22,6 @@ from django.conf import settings
from django.core.exceptions import FieldError, ObjectDoesNotExist
from django.db.models import Q, Sum
from django.db import IntegrityError, ProgrammingError, transaction, connection
from django.db.models.fields.related import ManyToManyField, ForeignKey
from django.shortcuts import get_object_or_404
from django.utils.safestring import mark_safe
from django.utils.timezone import now
@@ -69,6 +68,7 @@ from awx.api.generics import (
APIView,
BaseUsersList,
CopyAPIView,
DeleteLastUnattachLabelMixin,
GenericAPIView,
ListAPIView,
ListCreateAPIView,
@@ -85,7 +85,6 @@ from awx.api.generics import (
SubListCreateAttachDetachAPIView,
SubListDestroyAPIView,
)
from awx.api.views.labels import LabelSubListCreateAttachDetachView
from awx.api.versioning import reverse
from awx.main import models
from awx.main.utils import (
@@ -94,7 +93,7 @@ from awx.main.utils import (
get_object_or_400,
getattrd,
get_pk_from_dict,
ScheduleWorkflowManager,
schedule_task_manager,
ignore_inventory_computed_fields,
)
from awx.main.utils.encryption import encrypt_value
@@ -122,22 +121,6 @@ from awx.api.views.mixin import (
UnifiedJobDeletionMixin,
NoTruncateMixin,
)
from awx.api.views.instance_install_bundle import InstanceInstallBundle # noqa
from awx.api.views.inventory import ( # noqa
InventoryList,
InventoryDetail,
InventoryUpdateEventsList,
InventoryList,
InventoryDetail,
InventoryActivityStreamList,
InventoryInstanceGroupsList,
InventoryAccessList,
InventoryObjectRolesList,
InventoryJobTemplateList,
InventoryLabelList,
InventoryCopy,
)
from awx.api.views.mesh_visualizer import MeshVisualizer # noqa
from awx.api.views.organization import ( # noqa
OrganizationList,
OrganizationDetail,
@@ -161,6 +144,21 @@ from awx.api.views.organization import ( # noqa
OrganizationAccessList,
OrganizationObjectRolesList,
)
from awx.api.views.inventory import ( # noqa
InventoryList,
InventoryDetail,
InventoryUpdateEventsList,
InventoryList,
InventoryDetail,
InventoryActivityStreamList,
InventoryInstanceGroupsList,
InventoryAccessList,
InventoryObjectRolesList,
InventoryJobTemplateList,
InventoryLabelList,
InventoryCopy,
)
from awx.api.views.mesh_visualizer import MeshVisualizer # noqa
from awx.api.views.root import ( # noqa
ApiRootView,
ApiOAuthAuthorizationRootView,
@@ -175,6 +173,7 @@ from awx.api.views.webhooks import WebhookKeyView, GithubWebhookReceiver, Gitlab
from awx.api.pagination import UnifiedJobEventPagination
from awx.main.utils import set_environ
logger = logging.getLogger('awx.api.views')
@@ -359,7 +358,7 @@ class DashboardJobsGraphView(APIView):
return Response(dashboard_data)
class InstanceList(ListCreateAPIView):
class InstanceList(ListAPIView):
name = _("Instances")
model = models.Instance
@@ -398,17 +397,6 @@ class InstanceUnifiedJobsList(SubListAPIView):
return qs
class InstancePeersList(SubListAPIView):
name = _("Instance Peers")
parent_model = models.Instance
model = models.Instance
serializer_class = serializers.InstanceSerializer
parent_access = 'read'
search_fields = {'hostname'}
relationship = 'peers'
class InstanceInstanceGroupsList(InstanceGroupMembershipMixin, SubListCreateAttachDetachAPIView):
name = _("Instance's Instance Groups")
@@ -451,21 +439,41 @@ class InstanceHealthCheck(GenericAPIView):
def post(self, request, *args, **kwargs):
obj = self.get_object()
if obj.health_check_pending:
return Response({'msg': f"Health check was already in progress for {obj.hostname}."}, status=status.HTTP_200_OK)
# Note: hop nodes are already excluded by the get_queryset method
obj.health_check_started = now()
obj.save(update_fields=['health_check_started'])
if obj.node_type == models.Instance.Types.EXECUTION:
if obj.node_type == 'execution':
from awx.main.tasks.system import execution_node_health_check
execution_node_health_check.apply_async([obj.hostname])
runner_data = execution_node_health_check(obj.hostname)
obj.refresh_from_db()
data = self.get_serializer(data=request.data).to_representation(obj)
# Add in some extra unsaved fields
for extra_field in ('transmit_timing', 'run_timing'):
if extra_field in runner_data:
data[extra_field] = runner_data[extra_field]
else:
from awx.main.tasks.system import cluster_node_health_check
cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname)
return Response({'msg': f"Health check is running for {obj.hostname}."}, status=status.HTTP_200_OK)
if settings.CLUSTER_HOST_ID == obj.hostname:
cluster_node_health_check(obj.hostname)
else:
cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname)
start_time = time.time()
prior_check_time = obj.last_health_check
while time.time() - start_time < 50.0:
obj.refresh_from_db(fields=['last_health_check'])
if obj.last_health_check != prior_check_time:
break
if time.time() - start_time < 1.0:
time.sleep(0.1)
else:
time.sleep(1.0)
else:
obj.mark_offline(errors=_('Health check initiated by user determined this instance to be unresponsive'))
obj.refresh_from_db()
data = self.get_serializer(data=request.data).to_representation(obj)
return Response(data, status=status.HTTP_200_OK)
class InstanceGroupList(ListCreateAPIView):
@@ -610,19 +618,6 @@ class ScheduleCredentialsList(LaunchConfigCredentialsBase):
parent_model = models.Schedule
class ScheduleLabelsList(LabelSubListCreateAttachDetachView):
parent_model = models.Schedule
class ScheduleInstanceGroupList(SubListAttachDetachAPIView):
model = models.InstanceGroup
serializer_class = serializers.InstanceGroupSerializer
parent_model = models.Schedule
relationship = 'instance_groups'
class ScheduleUnifiedJobsList(SubListAPIView):
model = models.UnifiedJob
@@ -2387,13 +2382,10 @@ class JobTemplateLaunch(RetrieveAPIView):
for field, ask_field_name in modified_ask_mapping.items():
if not getattr(obj, ask_field_name):
data.pop(field, None)
elif isinstance(getattr(obj.__class__, field).field, ForeignKey):
elif field == 'inventory':
data[field] = getattrd(obj, "%s.%s" % (field, 'id'), None)
elif isinstance(getattr(obj.__class__, field).field, ManyToManyField):
if field == 'instance_groups':
data[field] = []
continue
data[field] = [item.id for item in getattr(obj, field).all()]
elif field == 'credentials':
data[field] = [cred.id for cred in obj.credentials.all()]
else:
data[field] = getattr(obj, field)
return data
@@ -2406,8 +2398,9 @@ class JobTemplateLaunch(RetrieveAPIView):
"""
modern_data = data.copy()
if 'inventory' not in modern_data and 'inventory_id' in modern_data:
modern_data['inventory'] = modern_data['inventory_id']
id_fd = '{}_id'.format('inventory')
if 'inventory' not in modern_data and id_fd in modern_data:
modern_data['inventory'] = modern_data[id_fd]
# credential passwords were historically provided as top-level attributes
if 'credential_passwords' not in modern_data:
@@ -2727,9 +2720,28 @@ class JobTemplateCredentialsList(SubListCreateAttachDetachAPIView):
return super(JobTemplateCredentialsList, self).is_valid_relation(parent, sub, created)
class JobTemplateLabelList(LabelSubListCreateAttachDetachView):
class JobTemplateLabelList(DeleteLastUnattachLabelMixin, SubListCreateAttachDetachAPIView):
model = models.Label
serializer_class = serializers.LabelSerializer
parent_model = models.JobTemplate
relationship = 'labels'
def post(self, request, *args, **kwargs):
# If a label already exists in the database, attach it instead of erroring out
# that it already exists
if 'id' not in request.data and 'name' in request.data and 'organization' in request.data:
existing = models.Label.objects.filter(name=request.data['name'], organization_id=request.data['organization'])
if existing.exists():
existing = existing[0]
request.data['id'] = existing.id
del request.data['name']
del request.data['organization']
if models.Label.objects.filter(unifiedjobtemplate_labels=self.kwargs['pk']).count() > 100:
return Response(
dict(msg=_('Maximum number of labels for {} reached.'.format(self.parent_model._meta.verbose_name_raw))), status=status.HTTP_400_BAD_REQUEST
)
return super(JobTemplateLabelList, self).post(request, *args, **kwargs)
class JobTemplateCallback(GenericAPIView):
@@ -2955,22 +2967,6 @@ class WorkflowJobNodeCredentialsList(SubListAPIView):
relationship = 'credentials'
class WorkflowJobNodeLabelsList(SubListAPIView):
model = models.Label
serializer_class = serializers.LabelSerializer
parent_model = models.WorkflowJobNode
relationship = 'labels'
class WorkflowJobNodeInstanceGroupsList(SubListAttachDetachAPIView):
model = models.InstanceGroup
serializer_class = serializers.InstanceGroupSerializer
parent_model = models.WorkflowJobNode
relationship = 'instance_groups'
class WorkflowJobTemplateNodeList(ListCreateAPIView):
model = models.WorkflowJobTemplateNode
@@ -2989,19 +2985,6 @@ class WorkflowJobTemplateNodeCredentialsList(LaunchConfigCredentialsBase):
parent_model = models.WorkflowJobTemplateNode
class WorkflowJobTemplateNodeLabelsList(LabelSubListCreateAttachDetachView):
parent_model = models.WorkflowJobTemplateNode
class WorkflowJobTemplateNodeInstanceGroupsList(SubListAttachDetachAPIView):
model = models.InstanceGroup
serializer_class = serializers.InstanceGroupSerializer
parent_model = models.WorkflowJobTemplateNode
relationship = 'instance_groups'
class WorkflowJobTemplateNodeChildrenBaseList(EnforceParentRelationshipMixin, SubListCreateAttachDetachAPIView):
model = models.WorkflowJobTemplateNode
@@ -3214,17 +3197,13 @@ class WorkflowJobTemplateLaunch(RetrieveAPIView):
data['extra_vars'] = extra_vars
modified_ask_mapping = models.WorkflowJobTemplate.get_ask_mapping()
modified_ask_mapping.pop('extra_vars')
for field, ask_field_name in modified_ask_mapping.items():
for field_name, ask_field_name in obj.get_ask_mapping().items():
if not getattr(obj, ask_field_name):
data.pop(field, None)
elif isinstance(getattr(obj.__class__, field).field, ForeignKey):
data[field] = getattrd(obj, "%s.%s" % (field, 'id'), None)
elif isinstance(getattr(obj.__class__, field).field, ManyToManyField):
data[field] = [item.id for item in getattr(obj, field).all()]
data.pop(field_name, None)
elif field_name == 'inventory':
data[field_name] = getattrd(obj, "%s.%s" % (field_name, 'id'), None)
else:
data[field] = getattr(obj, field)
data[field_name] = getattr(obj, field_name)
return data
def post(self, request, *args, **kwargs):
@@ -3413,7 +3392,7 @@ class WorkflowJobCancel(RetrieveAPIView):
obj = self.get_object()
if obj.can_cancel:
obj.cancel()
ScheduleWorkflowManager().schedule()
schedule_task_manager()
return Response(status=status.HTTP_202_ACCEPTED)
else:
return self.http_method_not_allowed(request, *args, **kwargs)
@@ -3711,21 +3690,15 @@ class JobCreateSchedule(RetrieveAPIView):
extra_data=config.extra_data,
survey_passwords=config.survey_passwords,
inventory=config.inventory,
execution_environment=config.execution_environment,
char_prompts=config.char_prompts,
credentials=set(config.credentials.all()),
labels=set(config.labels.all()),
instance_groups=list(config.instance_groups.all()),
)
if not request.user.can_access(models.Schedule, 'add', schedule_data):
raise PermissionDenied()
related_fields = ('credentials', 'labels', 'instance_groups')
related = [schedule_data.pop(relationship) for relationship in related_fields]
creds_list = schedule_data.pop('credentials')
schedule = models.Schedule.objects.create(**schedule_data)
for relationship, items in zip(related_fields, related):
for item in items:
getattr(schedule, relationship).add(item)
schedule.credentials.add(*creds_list)
data = serializers.ScheduleSerializer(schedule, context=self.get_serializer_context()).data
data.serializer.instance = None # hack to avoid permissions.py assuming this is Job model
@@ -3867,7 +3840,7 @@ class JobJobEventsList(BaseJobEventsList):
def get_queryset(self):
job = self.get_parent_object()
self.check_parent_access(job)
return job.get_event_queryset().prefetch_related('job__job_template', 'host').order_by('start_line')
return job.get_event_queryset().select_related('host').order_by('start_line')
class JobJobEventsChildrenSummary(APIView):
@@ -4456,6 +4429,18 @@ class NotificationDetail(RetrieveAPIView):
serializer_class = serializers.NotificationSerializer
class LabelList(ListCreateAPIView):
model = models.Label
serializer_class = serializers.LabelSerializer
class LabelDetail(RetrieveUpdateAPIView):
model = models.Label
serializer_class = serializers.LabelSerializer
class ActivityStreamList(SimpleListAPIView):
model = models.ActivityStream

View File

@@ -1,68 +0,0 @@
from collections import OrderedDict
from django.conf import settings
from rest_framework.permissions import AllowAny
from rest_framework.response import Response
from awx.api.generics import APIView
from awx.main.scheduler import TaskManager, DependencyManager, WorkflowManager
class TaskManagerDebugView(APIView):
_ignore_model_permissions = True
exclude_from_schema = True
permission_classes = [AllowAny]
prefix = 'Task'
def get(self, request):
TaskManager().schedule()
if not settings.AWX_DISABLE_TASK_MANAGERS:
msg = f"Running {self.prefix} manager. To disable other triggers to the {self.prefix} manager, set AWX_DISABLE_TASK_MANAGERS to True"
else:
msg = f"AWX_DISABLE_TASK_MANAGERS is True, this view is the only way to trigger the {self.prefix} manager"
return Response(msg)
class DependencyManagerDebugView(APIView):
_ignore_model_permissions = True
exclude_from_schema = True
permission_classes = [AllowAny]
prefix = 'Dependency'
def get(self, request):
DependencyManager().schedule()
if not settings.AWX_DISABLE_TASK_MANAGERS:
msg = f"Running {self.prefix} manager. To disable other triggers to the {self.prefix} manager, set AWX_DISABLE_TASK_MANAGERS to True"
else:
msg = f"AWX_DISABLE_TASK_MANAGERS is True, this view is the only way to trigger the {self.prefix} manager"
return Response(msg)
class WorkflowManagerDebugView(APIView):
_ignore_model_permissions = True
exclude_from_schema = True
permission_classes = [AllowAny]
prefix = 'Workflow'
def get(self, request):
WorkflowManager().schedule()
if not settings.AWX_DISABLE_TASK_MANAGERS:
msg = f"Running {self.prefix} manager. To disable other triggers to the {self.prefix} manager, set AWX_DISABLE_TASK_MANAGERS to True"
else:
msg = f"AWX_DISABLE_TASK_MANAGERS is True, this view is the only way to trigger the {self.prefix} manager"
return Response(msg)
class DebugRootView(APIView):
_ignore_model_permissions = True
exclude_from_schema = True
permission_classes = [AllowAny]
def get(self, request, format=None):
'''List of available debug urls'''
data = OrderedDict()
data['task_manager'] = '/api/debug/task_manager/'
data['dependency_manager'] = '/api/debug/dependency_manager/'
data['workflow_manager'] = '/api/debug/workflow_manager/'
return Response(data)

View File

@@ -1,199 +0,0 @@
# Copyright (c) 2018 Red Hat, Inc.
# All Rights Reserved.
import datetime
import io
import ipaddress
import os
import tarfile
import asn1
from awx.api import serializers
from awx.api.generics import GenericAPIView, Response
from awx.api.permissions import IsSystemAdminOrAuditor
from awx.main import models
from cryptography import x509
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.x509 import DNSName, IPAddress, ObjectIdentifier, OtherName
from cryptography.x509.oid import NameOID
from django.http import HttpResponse
from django.template.loader import render_to_string
from django.utils.translation import gettext_lazy as _
from rest_framework import status
# Red Hat has an OID namespace (RHANANA). Receptor has its own designation under that.
RECEPTOR_OID = "1.3.6.1.4.1.2312.19.1"
# generate install bundle for the instance
# install bundle directory structure
# ├── install_receptor.yml (playbook)
# ├── inventory.yml
# ├── group_vars
# │ └── all.yml
# ├── receptor
# │ ├── tls
# │ │ ├── ca
# │ │ │ └── receptor-ca.crt
# │ │ ├── receptor.crt
# │ │ └── receptor.key
# │ └── work-public-key.pem
# └── requirements.yml
class InstanceInstallBundle(GenericAPIView):
name = _('Install Bundle')
model = models.Instance
serializer_class = serializers.InstanceSerializer
permission_classes = (IsSystemAdminOrAuditor,)
def get(self, request, *args, **kwargs):
instance_obj = self.get_object()
if instance_obj.node_type not in ('execution',):
return Response(
data=dict(msg=_('Install bundle can only be generated for execution nodes.')),
status=status.HTTP_400_BAD_REQUEST,
)
with io.BytesIO() as f:
with tarfile.open(fileobj=f, mode='w:gz') as tar:
# copy /etc/receptor/tls/ca/receptor-ca.crt to receptor/tls/ca in the tar file
tar.add(
os.path.realpath('/etc/receptor/tls/ca/receptor-ca.crt'), arcname=f"{instance_obj.hostname}_install_bundle/receptor/tls/ca/receptor-ca.crt"
)
# copy /etc/receptor/signing/work-public-key.pem to receptor/work-public-key.pem
tar.add('/etc/receptor/signing/work-public-key.pem', arcname=f"{instance_obj.hostname}_install_bundle/receptor/work-public-key.pem")
# generate and write the receptor key to receptor/tls/receptor.key in the tar file
key, cert = generate_receptor_tls(instance_obj)
key_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/receptor/tls/receptor.key")
key_tarinfo.size = len(key)
tar.addfile(key_tarinfo, io.BytesIO(key))
cert_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/receptor/tls/receptor.crt")
cert_tarinfo.size = len(cert)
tar.addfile(cert_tarinfo, io.BytesIO(cert))
# generate and write install_receptor.yml to the tar file
playbook = generate_playbook().encode('utf-8')
playbook_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/install_receptor.yml")
playbook_tarinfo.size = len(playbook)
tar.addfile(playbook_tarinfo, io.BytesIO(playbook))
# generate and write inventory.yml to the tar file
inventory_yml = generate_inventory_yml(instance_obj).encode('utf-8')
inventory_yml_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/inventory.yml")
inventory_yml_tarinfo.size = len(inventory_yml)
tar.addfile(inventory_yml_tarinfo, io.BytesIO(inventory_yml))
# generate and write group_vars/all.yml to the tar file
group_vars = generate_group_vars_all_yml(instance_obj).encode('utf-8')
group_vars_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/group_vars/all.yml")
group_vars_tarinfo.size = len(group_vars)
tar.addfile(group_vars_tarinfo, io.BytesIO(group_vars))
# generate and write requirements.yml to the tar file
requirements_yml = generate_requirements_yml().encode('utf-8')
requirements_yml_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/requirements.yml")
requirements_yml_tarinfo.size = len(requirements_yml)
tar.addfile(requirements_yml_tarinfo, io.BytesIO(requirements_yml))
# respond with the tarfile
f.seek(0)
response = HttpResponse(f.read(), status=status.HTTP_200_OK)
response['Content-Disposition'] = f"attachment; filename={instance_obj.hostname}_install_bundle.tar.gz"
return response
def generate_playbook():
return render_to_string("instance_install_bundle/install_receptor.yml")
def generate_requirements_yml():
return render_to_string("instance_install_bundle/requirements.yml")
def generate_inventory_yml(instance_obj):
return render_to_string("instance_install_bundle/inventory.yml", context=dict(instance=instance_obj))
def generate_group_vars_all_yml(instance_obj):
return render_to_string("instance_install_bundle/group_vars/all.yml", context=dict(instance=instance_obj))
def generate_receptor_tls(instance_obj):
# generate private key for the receptor
key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
# encode receptor hostname to asn1
hostname = instance_obj.hostname
encoder = asn1.Encoder()
encoder.start()
encoder.write(hostname.encode(), nr=asn1.Numbers.UTF8String)
hostname_asn1 = encoder.output()
san_params = [
DNSName(hostname),
OtherName(ObjectIdentifier(RECEPTOR_OID), hostname_asn1),
]
try:
san_params.append(IPAddress(ipaddress.IPv4Address(hostname)))
except ipaddress.AddressValueError:
pass
# generate certificate for the receptor
csr = (
x509.CertificateSigningRequestBuilder()
.subject_name(
x509.Name(
[
x509.NameAttribute(NameOID.COMMON_NAME, hostname),
]
)
)
.add_extension(
x509.SubjectAlternativeName(san_params),
critical=False,
)
.sign(key, hashes.SHA256())
)
# sign csr with the receptor ca key from /etc/receptor/ca/receptor-ca.key
with open('/etc/receptor/tls/ca/receptor-ca.key', 'rb') as f:
ca_key = serialization.load_pem_private_key(
f.read(),
password=None,
)
with open('/etc/receptor/tls/ca/receptor-ca.crt', 'rb') as f:
ca_cert = x509.load_pem_x509_certificate(f.read())
cert = (
x509.CertificateBuilder()
.subject_name(csr.subject)
.issuer_name(ca_cert.issuer)
.public_key(csr.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.datetime.utcnow())
.not_valid_after(datetime.datetime.utcnow() + datetime.timedelta(days=10))
.add_extension(
csr.extensions.get_extension_for_class(x509.SubjectAlternativeName).value,
critical=csr.extensions.get_extension_for_class(x509.SubjectAlternativeName).critical,
)
.sign(ca_key, hashes.SHA256())
)
key = key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption(),
)
cert = cert.public_bytes(
encoding=serialization.Encoding.PEM,
)
return key, cert

View File

@@ -18,6 +18,8 @@ from rest_framework import status
# AWX
from awx.main.models import ActivityStream, Inventory, JobTemplate, Role, User, InstanceGroup, InventoryUpdateEvent, InventoryUpdate
from awx.main.models.label import Label
from awx.api.generics import (
ListCreateAPIView,
RetrieveUpdateDestroyAPIView,
@@ -25,8 +27,9 @@ from awx.api.generics import (
SubListAttachDetachAPIView,
ResourceAccessList,
CopyAPIView,
DeleteLastUnattachLabelMixin,
SubListCreateAttachDetachAPIView,
)
from awx.api.views.labels import LabelSubListCreateAttachDetachView
from awx.api.serializers import (
@@ -36,6 +39,7 @@ from awx.api.serializers import (
InstanceGroupSerializer,
InventoryUpdateEventSerializer,
JobTemplateSerializer,
LabelSerializer,
)
from awx.api.views.mixin import RelatedJobsPreventDeleteMixin
@@ -153,9 +157,28 @@ class InventoryJobTemplateList(SubListAPIView):
return qs.filter(inventory=parent)
class InventoryLabelList(LabelSubListCreateAttachDetachView):
class InventoryLabelList(DeleteLastUnattachLabelMixin, SubListCreateAttachDetachAPIView, SubListAPIView):
model = Label
serializer_class = LabelSerializer
parent_model = Inventory
relationship = 'labels'
def post(self, request, *args, **kwargs):
# If a label already exists in the database, attach it instead of erroring out
# that it already exists
if 'id' not in request.data and 'name' in request.data and 'organization' in request.data:
existing = Label.objects.filter(name=request.data['name'], organization_id=request.data['organization'])
if existing.exists():
existing = existing[0]
request.data['id'] = existing.id
del request.data['name']
del request.data['organization']
if Label.objects.filter(inventory_labels=self.kwargs['pk']).count() > 100:
return Response(
dict(msg=_('Maximum number of labels for {} reached.'.format(self.parent_model._meta.verbose_name_raw))), status=status.HTTP_400_BAD_REQUEST
)
return super(InventoryLabelList, self).post(request, *args, **kwargs)
class InventoryCopy(CopyAPIView):

View File

@@ -1,71 +0,0 @@
# AWX
from awx.api.generics import SubListCreateAttachDetachAPIView, RetrieveUpdateAPIView, ListCreateAPIView
from awx.main.models import Label
from awx.api.serializers import LabelSerializer
# Django
from django.utils.translation import gettext_lazy as _
# Django REST Framework
from rest_framework.response import Response
from rest_framework.status import HTTP_400_BAD_REQUEST
class LabelSubListCreateAttachDetachView(SubListCreateAttachDetachAPIView):
"""
For related labels lists like /api/v2/inventories/N/labels/
We want want the last instance to be deleted from the database
when the last disassociate happens.
Subclasses need to define parent_model
"""
model = Label
serializer_class = LabelSerializer
relationship = 'labels'
def unattach(self, request, *args, **kwargs):
(sub_id, res) = super().unattach_validate(request)
if res:
return res
res = super().unattach_by_id(request, sub_id)
obj = self.model.objects.get(id=sub_id)
if obj.is_detached():
obj.delete()
return res
def post(self, request, *args, **kwargs):
# If a label already exists in the database, attach it instead of erroring out
# that it already exists
if 'id' not in request.data and 'name' in request.data and 'organization' in request.data:
existing = Label.objects.filter(name=request.data['name'], organization_id=request.data['organization'])
if existing.exists():
existing = existing[0]
request.data['id'] = existing.id
del request.data['name']
del request.data['organization']
# Give a 400 error if we have attached too many labels to this object
label_filter = self.parent_model._meta.get_field(self.relationship).remote_field.name
if Label.objects.filter(**{label_filter: self.kwargs['pk']}).count() > 100:
return Response(dict(msg=_(f'Maximum number of labels for {self.parent_model._meta.verbose_name_raw} reached.')), status=HTTP_400_BAD_REQUEST)
return super().post(request, *args, **kwargs)
class LabelDetail(RetrieveUpdateAPIView):
model = Label
serializer_class = LabelSerializer
class LabelList(ListCreateAPIView):
name = _("Labels")
model = Label
serializer_class = LabelSerializer

View File

@@ -80,7 +80,7 @@ def _ctit_db_wrapper(trans_safe=False):
yield
except DBError as exc:
if trans_safe:
level = logger.warning
level = logger.exception
if isinstance(exc, ProgrammingError):
if 'relation' in str(exc) and 'does not exist' in str(exc):
# this generally means we can't fetch Tower configuration
@@ -89,7 +89,7 @@ def _ctit_db_wrapper(trans_safe=False):
# has come up *before* the database has finished migrating, and
# especially that the conf.settings table doesn't exist yet
level = logger.debug
level(f'Database settings are not available, using defaults. error: {str(exc)}')
level('Database settings are not available, using defaults.')
else:
logger.exception('Error modifying something related to database settings.')
finally:

View File

@@ -12,7 +12,7 @@ from django.conf import settings
from django.db.models import Q, Prefetch
from django.contrib.auth.models import User
from django.utils.translation import gettext_lazy as _
from django.core.exceptions import ObjectDoesNotExist, FieldDoesNotExist
from django.core.exceptions import ObjectDoesNotExist
# Django REST Framework
from rest_framework.exceptions import ParseError, PermissionDenied
@@ -281,23 +281,13 @@ class BaseAccess(object):
"""
return True
def assure_relationship_exists(self, obj, relationship):
if '.' in relationship:
return # not attempting validation for complex relationships now
try:
obj._meta.get_field(relationship)
except FieldDoesNotExist:
raise NotImplementedError(f'The relationship {relationship} does not exist for model {type(obj)}')
def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
self.assure_relationship_exists(obj, relationship)
if skip_sub_obj_read_check:
return self.can_change(obj, None)
else:
return bool(self.can_change(obj, None) and self.user.can_access(type(sub_obj), 'read', sub_obj))
def can_unattach(self, obj, sub_obj, relationship, data=None):
self.assure_relationship_exists(obj, relationship)
return self.can_change(obj, data)
def check_related(self, field, Model, data, role_field='admin_role', obj=None, mandatory=False):
@@ -338,8 +328,6 @@ class BaseAccess(object):
role = getattr(resource, role_field, None)
if role is None:
# Handle special case where resource does not have direct roles
if role_field == 'read_role':
return self.user.can_access(type(resource), 'read', resource)
access_method_type = {'admin_role': 'change', 'execute_role': 'start'}[role_field]
return self.user.can_access(type(resource), access_method_type, resource, None)
return self.user in role
@@ -511,21 +499,6 @@ class BaseAccess(object):
return False
class UnifiedCredentialsMixin(BaseAccess):
"""
The credentials many-to-many is a standard relationship for JT, jobs, and others
Permission to attach is always use permission, and permission to unattach is admin to the parent object
"""
@check_superuser
def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
if relationship == 'credentials':
if not isinstance(sub_obj, Credential):
raise RuntimeError(f'Can only attach credentials to credentials relationship, got {type(sub_obj)}')
return self.can_change(obj, None) and (self.user in sub_obj.use_role)
return super().can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
class NotificationAttachMixin(BaseAccess):
"""For models that can have notifications attached
@@ -579,8 +552,7 @@ class InstanceAccess(BaseAccess):
return super(InstanceAccess, self).can_unattach(obj, sub_obj, relationship, relationship, data=data)
def can_add(self, data):
return self.user.is_superuser
return False
def can_change(self, obj, data):
return False
@@ -1059,7 +1031,7 @@ class GroupAccess(BaseAccess):
return bool(obj and self.user in obj.inventory.admin_role)
class InventorySourceAccess(NotificationAttachMixin, UnifiedCredentialsMixin, BaseAccess):
class InventorySourceAccess(NotificationAttachMixin, BaseAccess):
"""
I can see inventory sources whenever I can see their inventory.
I can change inventory sources whenever I can change their inventory.
@@ -1103,6 +1075,18 @@ class InventorySourceAccess(NotificationAttachMixin, UnifiedCredentialsMixin, Ba
return self.user in obj.inventory.update_role
return False
@check_superuser
def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
if relationship == 'credentials' and isinstance(sub_obj, Credential):
return obj and obj.inventory and self.user in obj.inventory.admin_role and self.user in sub_obj.use_role
return super(InventorySourceAccess, self).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
@check_superuser
def can_unattach(self, obj, sub_obj, relationship, *args, **kwargs):
if relationship == 'credentials' and isinstance(sub_obj, Credential):
return obj and obj.inventory and self.user in obj.inventory.admin_role
return super(InventorySourceAccess, self).can_attach(obj, sub_obj, relationship, *args, **kwargs)
class InventoryUpdateAccess(BaseAccess):
"""
@@ -1501,7 +1485,7 @@ class ProjectUpdateAccess(BaseAccess):
return obj and self.user in obj.project.admin_role
class JobTemplateAccess(NotificationAttachMixin, UnifiedCredentialsMixin, BaseAccess):
class JobTemplateAccess(NotificationAttachMixin, BaseAccess):
"""
I can see job templates when:
- I have read role for the job template.
@@ -1565,7 +1549,8 @@ class JobTemplateAccess(NotificationAttachMixin, UnifiedCredentialsMixin, BaseAc
if self.user not in inventory.use_role:
return False
if not self.check_related('execution_environment', ExecutionEnvironment, data, role_field='read_role'):
ee = get_value(ExecutionEnvironment, 'execution_environment')
if ee and not self.user.can_access(ExecutionEnvironment, 'read', ee):
return False
project = get_value(Project, 'project')
@@ -1615,8 +1600,10 @@ class JobTemplateAccess(NotificationAttachMixin, UnifiedCredentialsMixin, BaseAc
if self.changes_are_non_sensitive(obj, data):
return True
if not self.check_related('execution_environment', ExecutionEnvironment, data, obj=obj, role_field='read_role'):
return False
if data.get('execution_environment'):
ee = get_object_from_data('execution_environment', ExecutionEnvironment, data)
if not self.user.can_access(ExecutionEnvironment, 'read', ee):
return False
for required_field, cls in (('inventory', Inventory), ('project', Project)):
is_mandatory = True
@@ -1680,13 +1667,17 @@ class JobTemplateAccess(NotificationAttachMixin, UnifiedCredentialsMixin, BaseAc
if not obj.organization:
return False
return self.user.can_access(type(sub_obj), "read", sub_obj) and self.user in obj.organization.admin_role
if relationship == 'credentials' and isinstance(sub_obj, Credential):
return self.user in obj.admin_role and self.user in sub_obj.use_role
return super(JobTemplateAccess, self).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
@check_superuser
def can_unattach(self, obj, sub_obj, relationship, *args, **kwargs):
if relationship == "instance_groups":
return self.can_attach(obj, sub_obj, relationship, *args, **kwargs)
return super(JobTemplateAccess, self).can_unattach(obj, sub_obj, relationship, *args, **kwargs)
if relationship == 'credentials' and isinstance(sub_obj, Credential):
return self.user in obj.admin_role
return super(JobTemplateAccess, self).can_attach(obj, sub_obj, relationship, *args, **kwargs)
class JobAccess(BaseAccess):
@@ -1833,7 +1824,7 @@ class SystemJobAccess(BaseAccess):
return False # no relaunching of system jobs
class JobLaunchConfigAccess(UnifiedCredentialsMixin, BaseAccess):
class JobLaunchConfigAccess(BaseAccess):
"""
Launch configs must have permissions checked for
- relaunching
@@ -1841,69 +1832,63 @@ class JobLaunchConfigAccess(UnifiedCredentialsMixin, BaseAccess):
In order to create a new object with a copy of this launch config, I need:
- use access to related inventory (if present)
- read access to Execution Environment (if present), unless the specified ee is already in the template
- use role to many-related credentials (if any present)
- read access to many-related labels (if any present), unless the specified label is already in the template
- read access to many-related instance groups (if any present), unless the specified instance group is already in the template
"""
model = JobLaunchConfig
select_related = 'job'
prefetch_related = ('credentials', 'inventory')
M2M_CHECKS = {'credentials': Credential, 'labels': Label, 'instance_groups': InstanceGroup}
def _unusable_creds_exist(self, qs):
return qs.exclude(pk__in=Credential._accessible_pk_qs(Credential, self.user, 'use_role')).exists()
def _related_filtered_queryset(self, cls):
if cls is Label:
return LabelAccess(self.user).filtered_queryset()
elif cls is InstanceGroup:
return InstanceGroupAccess(self.user).filtered_queryset()
else:
return cls._accessible_pk_qs(cls, self.user, 'use_role')
def has_obj_m2m_access(self, obj):
for relationship, cls in self.M2M_CHECKS.items():
if getattr(obj, relationship).exclude(pk__in=self._related_filtered_queryset(cls)).exists():
return False
return True
def has_credentials_access(self, obj):
# user has access if no related credentials exist that the user lacks use role for
return not self._unusable_creds_exist(obj.credentials)
@check_superuser
def can_add(self, data, template=None):
# This is a special case, we don't check related many-to-many elsewhere
# launch RBAC checks use this
if 'reference_obj' in data:
if not self.has_obj_m2m_access(data['reference_obj']):
if 'credentials' in data and data['credentials'] or 'reference_obj' in data:
if 'reference_obj' in data:
prompted_cred_qs = data['reference_obj'].credentials.all()
else:
# If given model objects, only use the primary key from them
cred_pks = [cred.pk for cred in data['credentials']]
if template:
for cred in template.credentials.all():
if cred.pk in cred_pks:
cred_pks.remove(cred.pk)
prompted_cred_qs = Credential.objects.filter(pk__in=cred_pks)
if self._unusable_creds_exist(prompted_cred_qs):
return False
else:
for relationship, cls in self.M2M_CHECKS.items():
if relationship in data and data[relationship]:
# If given model objects, only use the primary key from them
sub_obj_pks = [sub_obj.pk for sub_obj in data[relationship]]
if template:
for sub_obj in getattr(template, relationship).all():
if sub_obj.pk in sub_obj_pks:
sub_obj_pks.remove(sub_obj.pk)
if cls.objects.filter(pk__in=sub_obj_pks).exclude(pk__in=self._related_filtered_queryset(cls)).exists():
return False
return self.check_related('inventory', Inventory, data, role_field='use_role') and self.check_related(
'execution_environment', ExecutionEnvironment, data, role_field='read_role'
)
return self.check_related('inventory', Inventory, data, role_field='use_role')
@check_superuser
def can_use(self, obj):
return (
self.has_obj_m2m_access(obj)
and self.check_related('inventory', Inventory, {}, obj=obj, role_field='use_role', mandatory=True)
and self.check_related('execution_environment', ExecutionEnvironment, {}, obj=obj, role_field='read_role')
)
return self.check_related('inventory', Inventory, {}, obj=obj, role_field='use_role', mandatory=True) and self.has_credentials_access(obj)
def can_change(self, obj, data):
return self.check_related('inventory', Inventory, data, obj=obj, role_field='use_role') and self.check_related(
'execution_environment', ExecutionEnvironment, data, obj=obj, role_field='read_role'
)
return self.check_related('inventory', Inventory, data, obj=obj, role_field='use_role')
def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
if isinstance(sub_obj, Credential) and relationship == 'credentials':
return self.user in sub_obj.use_role
else:
raise NotImplementedError('Only credentials can be attached to launch configurations.')
def can_unattach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
if isinstance(sub_obj, Credential) and relationship == 'credentials':
if skip_sub_obj_read_check:
return True
else:
return self.user in sub_obj.read_role
else:
raise NotImplementedError('Only credentials can be attached to launch configurations.')
class WorkflowJobTemplateNodeAccess(UnifiedCredentialsMixin, BaseAccess):
class WorkflowJobTemplateNodeAccess(BaseAccess):
"""
I can see/use a WorkflowJobTemplateNode if I have read permission
to associated Workflow Job Template
@@ -1926,7 +1911,7 @@ class WorkflowJobTemplateNodeAccess(UnifiedCredentialsMixin, BaseAccess):
"""
model = WorkflowJobTemplateNode
prefetch_related = ('success_nodes', 'failure_nodes', 'always_nodes', 'unified_job_template', 'workflow_job_template')
prefetch_related = ('success_nodes', 'failure_nodes', 'always_nodes', 'unified_job_template', 'credentials', 'workflow_job_template')
def filtered_queryset(self):
return self.model.objects.filter(workflow_job_template__in=WorkflowJobTemplate.accessible_objects(self.user, 'read_role'))
@@ -1938,8 +1923,7 @@ class WorkflowJobTemplateNodeAccess(UnifiedCredentialsMixin, BaseAccess):
return (
self.check_related('workflow_job_template', WorkflowJobTemplate, data, mandatory=True)
and self.check_related('unified_job_template', UnifiedJobTemplate, data, role_field='execute_role')
and self.check_related('inventory', Inventory, data, role_field='use_role')
and self.check_related('execution_environment', ExecutionEnvironment, data, role_field='read_role')
and JobLaunchConfigAccess(self.user).can_add(data)
)
def wfjt_admin(self, obj):
@@ -1948,14 +1932,17 @@ class WorkflowJobTemplateNodeAccess(UnifiedCredentialsMixin, BaseAccess):
else:
return self.user in obj.workflow_job_template.admin_role
def ujt_execute(self, obj, data=None):
def ujt_execute(self, obj):
if not obj.unified_job_template:
return True
return self.check_related('unified_job_template', UnifiedJobTemplate, data, obj=obj, role_field='execute_role', mandatory=True)
return self.check_related('unified_job_template', UnifiedJobTemplate, {}, obj=obj, role_field='execute_role', mandatory=True)
def can_change(self, obj, data):
if not data:
return True
# should not be able to edit the prompts if lacking access to UJT or WFJT
return self.ujt_execute(obj, data=data) and self.wfjt_admin(obj) and JobLaunchConfigAccess(self.user).can_change(obj, data)
return self.ujt_execute(obj) and self.wfjt_admin(obj) and JobLaunchConfigAccess(self.user).can_change(obj, data)
def can_delete(self, obj):
return self.wfjt_admin(obj)
@@ -1968,14 +1955,29 @@ class WorkflowJobTemplateNodeAccess(UnifiedCredentialsMixin, BaseAccess):
return True
def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
if relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
return self.wfjt_admin(obj) and self.check_same_WFJT(obj, sub_obj)
return super().can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
if not self.wfjt_admin(obj):
return False
if relationship == 'credentials':
# Need permission to related template to attach a credential
if not self.ujt_execute(obj):
return False
return JobLaunchConfigAccess(self.user).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
elif relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
return self.check_same_WFJT(obj, sub_obj)
else:
raise NotImplementedError('Relationship {} not understood for WFJT nodes.'.format(relationship))
def can_unattach(self, obj, sub_obj, relationship, data=None):
if relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
return self.wfjt_admin(obj)
return super().can_unattach(obj, sub_obj, relationship, data=None)
def can_unattach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
if not self.wfjt_admin(obj):
return False
if relationship == 'credentials':
if not self.ujt_execute(obj):
return False
return JobLaunchConfigAccess(self.user).can_unattach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
elif relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
return self.check_same_WFJT(obj, sub_obj)
else:
raise NotImplementedError('Relationship {} not understood for WFJT nodes.'.format(relationship))
class WorkflowJobNodeAccess(BaseAccess):
@@ -2050,10 +2052,13 @@ class WorkflowJobTemplateAccess(NotificationAttachMixin, BaseAccess):
if not data: # So the browseable API will work
return Organization.accessible_objects(self.user, 'workflow_admin_role').exists()
return bool(
self.check_related('organization', Organization, data, role_field='workflow_admin_role', mandatory=True)
and self.check_related('inventory', Inventory, data, role_field='use_role')
and self.check_related('execution_environment', ExecutionEnvironment, data, role_field='read_role')
if data.get('execution_environment'):
ee = get_object_from_data('execution_environment', ExecutionEnvironment, data)
if not self.user.can_access(ExecutionEnvironment, 'read', ee):
return False
return self.check_related('organization', Organization, data, role_field='workflow_admin_role', mandatory=True) and self.check_related(
'inventory', Inventory, data, role_field='use_role'
)
def can_copy(self, obj):
@@ -2099,10 +2104,14 @@ class WorkflowJobTemplateAccess(NotificationAttachMixin, BaseAccess):
if self.user.is_superuser:
return True
if data and data.get('execution_environment'):
ee = get_object_from_data('execution_environment', ExecutionEnvironment, data)
if not self.user.can_access(ExecutionEnvironment, 'read', ee):
return False
return (
self.check_related('organization', Organization, data, role_field='workflow_admin_role', obj=obj)
and self.check_related('inventory', Inventory, data, role_field='use_role', obj=obj)
and self.check_related('execution_environment', ExecutionEnvironment, data, obj=obj, role_field='read_role')
and self.user in obj.admin_role
)
@@ -2509,7 +2518,7 @@ class UnifiedJobAccess(BaseAccess):
return super(UnifiedJobAccess, self).get_queryset().filter(workflowapproval__isnull=True)
class ScheduleAccess(UnifiedCredentialsMixin, BaseAccess):
class ScheduleAccess(BaseAccess):
"""
I can see a schedule if I can see it's related unified job, I can create them or update them if I have write access
"""
@@ -2550,6 +2559,12 @@ class ScheduleAccess(UnifiedCredentialsMixin, BaseAccess):
def can_delete(self, obj):
return self.can_change(obj, {})
def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
return JobLaunchConfigAccess(self.user).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
def can_unattach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
return JobLaunchConfigAccess(self.user).can_unattach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
class NotificationTemplateAccess(BaseAccess):
"""

View File

@@ -16,7 +16,6 @@ from awx.conf.license import get_license
from awx.main.utils import get_awx_version, camelcase_to_underscore, datetime_hook
from awx.main import models
from awx.main.analytics import register
from awx.main.scheduler.task_manager_models import TaskManagerInstances
"""
This module is used to define metrics collected by awx.main.analytics.gather()
@@ -236,25 +235,25 @@ def projects_by_scm_type(since, **kwargs):
@register('instance_info', '1.2', description=_('Cluster topology and capacity'))
def instance_info(since, include_hostnames=False, **kwargs):
info = {}
# Use same method that the TaskManager does to compute consumed capacity without querying all running jobs for each Instance
active_tasks = models.UnifiedJob.objects.filter(status__in=['running', 'waiting']).only('task_impact', 'controller_node', 'execution_node')
tm_instances = TaskManagerInstances(active_tasks, instance_fields=['uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'enabled'])
for tm_instance in tm_instances.instances_by_hostname.values():
instance = tm_instance.obj
instances = models.Instance.objects.values_list('hostname').values(
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'enabled'
)
for instance in instances:
consumed_capacity = sum(x.task_impact for x in models.UnifiedJob.objects.filter(execution_node=instance['hostname'], status__in=('running', 'waiting')))
instance_info = {
'uuid': instance.uuid,
'version': instance.version,
'capacity': instance.capacity,
'cpu': instance.cpu,
'memory': instance.memory,
'managed_by_policy': instance.managed_by_policy,
'enabled': instance.enabled,
'consumed_capacity': tm_instance.consumed_capacity,
'remaining_capacity': instance.capacity - tm_instance.consumed_capacity,
'uuid': instance['uuid'],
'version': instance['version'],
'capacity': instance['capacity'],
'cpu': instance['cpu'],
'memory': instance['memory'],
'managed_by_policy': instance['managed_by_policy'],
'enabled': instance['enabled'],
'consumed_capacity': consumed_capacity,
'remaining_capacity': instance['capacity'] - consumed_capacity,
}
if include_hostnames is True:
instance_info['hostname'] = instance.hostname
info[instance.uuid] = instance_info
instance_info['hostname'] = instance['hostname']
info[instance['uuid']] = instance_info
return info

View File

@@ -3,7 +3,6 @@ from prometheus_client import CollectorRegistry, Gauge, Info, generate_latest
from awx.conf.license import get_license
from awx.main.utils import get_awx_version
from awx.main.models import UnifiedJob
from awx.main.analytics.collectors import (
counts,
instance_info,
@@ -170,9 +169,8 @@ def metrics():
all_job_data = job_counts(None)
statuses = all_job_data.get('status', {})
states = set(dict(UnifiedJob.STATUS_CHOICES).keys()) - set(['new'])
for state in states:
STATUS.labels(status=state).set(statuses.get(state, 0))
for status, value in statuses.items():
STATUS.labels(status=status).set(value)
RUNNING_JOBS.set(current_counts['running_jobs'])
PENDING_JOBS.set(current_counts['pending_jobs'])

View File

@@ -166,11 +166,7 @@ class Metrics:
elif settings.IS_TESTING():
self.instance_name = "awx_testing"
else:
try:
self.instance_name = Instance.objects.me().hostname
except Exception as e:
self.instance_name = settings.CLUSTER_HOST_ID
logger.info(f'Instance {self.instance_name} seems to be unregistered, error: {e}')
self.instance_name = Instance.objects.me().hostname
# metric name, help_text
METRICSLIST = [
@@ -188,29 +184,19 @@ class Metrics:
FloatM('subsystem_metrics_pipe_execute_seconds', 'Time spent saving metrics to redis'),
IntM('subsystem_metrics_pipe_execute_calls', 'Number of calls to pipe_execute'),
FloatM('subsystem_metrics_send_metrics_seconds', 'Time spent sending metrics to other nodes'),
SetFloatM('task_manager_get_tasks_seconds', 'Time spent in loading tasks from db'),
SetFloatM('task_manager_get_tasks_seconds', 'Time spent in loading all tasks from db'),
SetFloatM('task_manager_start_task_seconds', 'Time spent starting task'),
SetFloatM('task_manager_process_running_tasks_seconds', 'Time spent processing running tasks'),
SetFloatM('task_manager_process_pending_tasks_seconds', 'Time spent processing pending tasks'),
SetFloatM('task_manager_generate_dependencies_seconds', 'Time spent generating dependencies for pending tasks'),
SetFloatM('task_manager_spawn_workflow_graph_jobs_seconds', 'Time spent spawning workflow jobs'),
SetFloatM('task_manager__schedule_seconds', 'Time spent in running the entire _schedule'),
IntM('task_manager__schedule_calls', 'Number of calls to _schedule, after lock is acquired'),
IntM('task_manager_schedule_calls', 'Number of calls to task manager schedule'),
SetFloatM('task_manager_recorded_timestamp', 'Unix timestamp when metrics were last recorded'),
SetIntM('task_manager_tasks_started', 'Number of tasks started'),
SetIntM('task_manager_running_processed', 'Number of running tasks processed'),
SetIntM('task_manager_pending_processed', 'Number of pending tasks processed'),
SetIntM('task_manager_tasks_blocked', 'Number of tasks blocked from running'),
SetFloatM('task_manager_commit_seconds', 'Time spent in db transaction, including on_commit calls'),
SetFloatM('dependency_manager_get_tasks_seconds', 'Time spent loading pending tasks from db'),
SetFloatM('dependency_manager_generate_dependencies_seconds', 'Time spent generating dependencies for pending tasks'),
SetFloatM('dependency_manager__schedule_seconds', 'Time spent in running the entire _schedule'),
IntM('dependency_manager__schedule_calls', 'Number of calls to _schedule, after lock is acquired'),
SetFloatM('dependency_manager_recorded_timestamp', 'Unix timestamp when metrics were last recorded'),
SetIntM('dependency_manager_pending_processed', 'Number of pending tasks processed'),
SetFloatM('workflow_manager__schedule_seconds', 'Time spent in running the entire _schedule'),
IntM('workflow_manager__schedule_calls', 'Number of calls to _schedule, after lock is acquired'),
SetFloatM('workflow_manager_recorded_timestamp', 'Unix timestamp when metrics were last recorded'),
SetFloatM('workflow_manager_spawn_workflow_graph_jobs_seconds', 'Time spent spawning workflow tasks'),
SetFloatM('workflow_manager_get_tasks_seconds', 'Time spent loading workflow tasks from db'),
]
# turn metric list into dictionary with the metric name as a key
self.METRICS = {}
@@ -317,12 +303,7 @@ class Metrics:
self.previous_send_metrics.set(current_time)
self.previous_send_metrics.store_value(self.conn)
finally:
try:
lock.release()
except Exception as exc:
# After system failures, we might throw redis.exceptions.LockNotOwnedError
# this is to avoid print a Traceback, and importantly, avoid raising an exception into parent context
logger.warning(f'Error releasing subsystem metrics redis lock, error: {str(exc)}')
lock.release()
def load_other_metrics(self, request):
# data received from other nodes are stored in their own keys

View File

@@ -446,7 +446,7 @@ register(
label=_('Default Job Idle Timeout'),
help_text=_(
'If no output is detected from ansible in this number of seconds the execution will be terminated. '
'Use value of 0 to indicate that no idle timeout should be imposed.'
'Use value of 0 to used default idle_timeout is 600s.'
),
category=_('Jobs'),
category_slug='jobs',

View File

@@ -4,7 +4,6 @@ import select
from contextlib import contextmanager
from django.conf import settings
from django.db import connection as pg_connection
NOT_READY = ([], [], [])
@@ -16,6 +15,7 @@ def get_local_queuename():
class PubSub(object):
def __init__(self, conn):
assert conn.autocommit, "Connection must be in autocommit mode."
self.conn = conn
def listen(self, channel):
@@ -31,9 +31,6 @@ class PubSub(object):
cur.execute('SELECT pg_notify(%s, %s);', (channel, payload))
def events(self, select_timeout=5, yield_timeouts=False):
if not self.conn.autocommit:
raise RuntimeError('Listening for events can only be done in autocommit mode')
while True:
if select.select([self.conn], [], [], select_timeout) == NOT_READY:
if yield_timeouts:
@@ -48,32 +45,11 @@ class PubSub(object):
@contextmanager
def pg_bus_conn(new_connection=False):
'''
Any listeners probably want to establish a new database connection,
separate from the Django connection used for queries, because that will prevent
losing connection to the channel whenever a .close() happens.
Any publishers probably want to use the existing connection
so that messages follow postgres transaction rules
https://www.postgresql.org/docs/current/sql-notify.html
'''
if new_connection:
conf = settings.DATABASES['default']
conn = psycopg2.connect(
dbname=conf['NAME'], host=conf['HOST'], user=conf['USER'], password=conf['PASSWORD'], port=conf['PORT'], **conf.get("OPTIONS", {})
)
# Django connection.cursor().connection doesn't have autocommit=True on by default
conn.set_session(autocommit=True)
else:
if pg_connection.connection is None:
pg_connection.connect()
if pg_connection.connection is None:
raise RuntimeError('Unexpectedly could not connect to postgres for pg_notify actions')
conn = pg_connection.connection
def pg_bus_conn():
conf = settings.DATABASES['default']
conn = psycopg2.connect(dbname=conf['NAME'], host=conf['HOST'], user=conf['USER'], password=conf['PASSWORD'], port=conf['PORT'], **conf.get("OPTIONS", {}))
# Django connection.cursor().connection doesn't have autocommit=True on
conn.set_session(autocommit=True)
pubsub = PubSub(conn)
yield pubsub
if new_connection:
conn.close()
conn.close()

View File

@@ -37,24 +37,18 @@ class Control(object):
def running(self, *args, **kwargs):
return self.control_with_reply('running', *args, **kwargs)
def cancel(self, task_ids, *args, **kwargs):
return self.control_with_reply('cancel', *args, extra_data={'task_ids': task_ids}, **kwargs)
@classmethod
def generate_reply_queue_name(cls):
return f"reply_to_{str(uuid.uuid4()).replace('-','_')}"
def control_with_reply(self, command, timeout=5, extra_data=None):
def control_with_reply(self, command, timeout=5):
logger.warning('checking {} {} for {}'.format(self.service, command, self.queuename))
reply_queue = Control.generate_reply_queue_name()
self.result = None
with pg_bus_conn(new_connection=True) as conn:
with pg_bus_conn() as conn:
conn.listen(reply_queue)
send_data = {'control': command, 'reply_to': reply_queue}
if extra_data:
send_data.update(extra_data)
conn.notify(self.queuename, json.dumps(send_data))
conn.notify(self.queuename, json.dumps({'control': command, 'reply_to': reply_queue}))
for reply in conn.events(select_timeout=timeout, yield_timeouts=True):
if reply is None:

View File

@@ -16,14 +16,13 @@ from queue import Full as QueueFull, Empty as QueueEmpty
from django.conf import settings
from django.db import connection as django_connection, connections
from django.core.cache import cache as django_cache
from django.utils.timezone import now as tz_now
from django_guid import set_guid
from jinja2 import Template
import psutil
from awx.main.models import UnifiedJob
from awx.main.dispatch import reaper
from awx.main.utils.common import convert_mem_str_to_bytes, get_mem_effective_capacity, log_excess_runtime
from awx.main.utils.common import convert_mem_str_to_bytes, get_mem_effective_capacity
if 'run_callback_receiver' in sys.argv:
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
@@ -329,16 +328,12 @@ class AutoscalePool(WorkerPool):
# Get same number as max forks based on memory, this function takes memory as bytes
self.max_workers = get_mem_effective_capacity(total_memory_gb * 2**30)
# add magic prime number of extra workers to ensure
# we have a few extra workers to run the heartbeat
self.max_workers += 7
# max workers can't be less than min_workers
self.max_workers = max(self.min_workers, self.max_workers)
# the task manager enforces settings.TASK_MANAGER_TIMEOUT on its own
# but if the task takes longer than the time defined here, we will force it to stop here
self.task_manager_timeout = settings.TASK_MANAGER_TIMEOUT + settings.TASK_MANAGER_TIMEOUT_GRACE_PERIOD
def debug(self, *args, **kwargs):
self.cleanup()
return super(AutoscalePool, self).debug(*args, **kwargs)
@property
def should_grow(self):
@@ -356,7 +351,6 @@ class AutoscalePool(WorkerPool):
def debug_meta(self):
return 'min={} max={}'.format(self.min_workers, self.max_workers)
@log_excess_runtime(logger)
def cleanup(self):
"""
Perform some internal account and cleanup. This is run on
@@ -365,6 +359,8 @@ class AutoscalePool(WorkerPool):
1. Discover worker processes that exited, and recover messages they
were handling.
2. Clean up unnecessary, idle workers.
3. Check to see if the database says this node is running any tasks
that aren't actually running. If so, reap them.
IMPORTANT: this function is one of the few places in the dispatcher
(aside from setting lookups) where we talk to the database. As such,
@@ -405,15 +401,13 @@ class AutoscalePool(WorkerPool):
# the task manager to never do more work
current_task = w.current_task
if current_task and isinstance(current_task, dict):
endings = ['tasks.task_manager', 'tasks.dependency_manager', 'tasks.workflow_manager']
current_task_name = current_task.get('task', '')
if any(current_task_name.endswith(e) for e in endings):
if current_task.get('task', '').endswith('tasks.run_task_manager'):
if 'started' not in current_task:
w.managed_tasks[current_task['uuid']]['started'] = time.time()
age = time.time() - current_task['started']
w.managed_tasks[current_task['uuid']]['age'] = age
if age > self.task_manager_timeout:
logger.error(f'{current_task_name} has held the advisory lock for {age}, sending SIGTERM to {w.pid}')
if age > (60 * 5):
logger.error(f'run_task_manager has held the advisory lock for >5m, sending SIGTERM to {w.pid}') # noqa
os.kill(w.pid, signal.SIGTERM)
for m in orphaned:
@@ -423,17 +417,13 @@ class AutoscalePool(WorkerPool):
idx = random.choice(range(len(self.workers)))
self.write(idx, m)
def add_bind_kwargs(self, body):
bind_kwargs = body.pop('bind_kwargs', [])
body.setdefault('kwargs', {})
if 'dispatch_time' in bind_kwargs:
body['kwargs']['dispatch_time'] = tz_now().isoformat()
if 'worker_tasks' in bind_kwargs:
worker_tasks = {}
for worker in self.workers:
worker.calculate_managed_tasks()
worker_tasks[worker.pid] = list(worker.managed_tasks.keys())
body['kwargs']['worker_tasks'] = worker_tasks
# if the database says a job is running on this node, but it's *not*,
# then reap it
running_uuids = []
for worker in self.workers:
worker.calculate_managed_tasks()
running_uuids.extend(list(worker.managed_tasks.keys()))
reaper.reap(excluded_uuids=running_uuids)
def up(self):
if self.full:
@@ -448,8 +438,6 @@ class AutoscalePool(WorkerPool):
if 'guid' in body:
set_guid(body['guid'])
try:
if isinstance(body, dict) and body.get('bind_kwargs'):
self.add_bind_kwargs(body)
# when the cluster heartbeat occurs, clean up internally
if isinstance(body, dict) and 'cluster_node_heartbeat' in body['task']:
self.cleanup()
@@ -464,10 +452,6 @@ class AutoscalePool(WorkerPool):
w.put(body)
break
else:
task_name = 'unknown'
if isinstance(body, dict):
task_name = body.get('task')
logger.warn(f'Workers maxed, queuing {task_name}, load: {sum(len(w.managed_tasks) for w in self.workers)} / {len(self.workers)}')
return super(AutoscalePool, self).write(preferred_queue, body)
except Exception:
for conn in connections.all():

View File

@@ -2,7 +2,6 @@ import inspect
import logging
import sys
import json
import time
from uuid import uuid4
from django.conf import settings
@@ -50,21 +49,13 @@ class task:
@task(queue='tower_broadcast')
def announce():
print("Run this everywhere!")
# The special parameter bind_kwargs tells the main dispatcher process to add certain kwargs
@task(bind_kwargs=['dispatch_time'])
def print_time(dispatch_time=None):
print(f"Time I was dispatched: {dispatch_time}")
"""
def __init__(self, queue=None, bind_kwargs=None):
def __init__(self, queue=None):
self.queue = queue
self.bind_kwargs = bind_kwargs
def __call__(self, fn=None):
queue = self.queue
bind_kwargs = self.bind_kwargs
class PublisherMixin(object):
@@ -84,12 +75,10 @@ class task:
msg = f'{cls.name}: Queue value required and may not be None'
logger.error(msg)
raise ValueError(msg)
obj = {'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name, 'time_pub': time.time()}
obj = {'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name}
guid = get_guid()
if guid:
obj['guid'] = guid
if bind_kwargs:
obj['bind_kwargs'] = bind_kwargs
obj.update(**kw)
if callable(queue):
queue = queue()

View File

@@ -2,7 +2,6 @@ from datetime import timedelta
import logging
from django.db.models import Q
from django.conf import settings
from django.utils.timezone import now as tz_now
from django.contrib.contenttypes.models import ContentType
@@ -16,71 +15,44 @@ def startup_reaping():
If this particular instance is starting, then we know that any running jobs are invalid
so we will reap those jobs as a special action here
"""
try:
me = Instance.objects.me()
except RuntimeError as e:
logger.warning(f'Local instance is not registered, not running startup reaper: {e}')
return
me = Instance.objects.me()
jobs = UnifiedJob.objects.filter(status='running', controller_node=me.hostname)
job_ids = []
for j in jobs:
job_ids.append(j.id)
reap_job(
j,
'failed',
job_explanation='Task was marked as running at system start up. The system must have not shut down properly, so it has been marked as failed.',
)
j.status = 'failed'
j.start_args = ''
j.job_explanation += 'Task was marked as running at system start up. The system must have not shut down properly, so it has been marked as failed.'
j.save(update_fields=['status', 'start_args', 'job_explanation'])
if hasattr(j, 'send_notification_templates'):
j.send_notification_templates('failed')
j.websocket_emit_status('failed')
if job_ids:
logger.error(f'Unified jobs {job_ids} were reaped on dispatch startup')
def reap_job(j, status, job_explanation=None):
j.refresh_from_db(fields=['status', 'job_explanation'])
status_before = j.status
if status_before not in ('running', 'waiting'):
def reap_job(j, status):
if UnifiedJob.objects.get(id=j.id).status not in ('running', 'waiting'):
# just in case, don't reap jobs that aren't running
return
j.status = status
j.start_args = '' # blank field to remove encrypted passwords
if j.job_explanation:
j.job_explanation += ' ' # Separate messages for readability
if job_explanation is None:
j.job_explanation += 'Task was marked as running but was not present in the job queue, so it has been marked as failed.'
else:
j.job_explanation += job_explanation
j.job_explanation += ' '.join(
(
'Task was marked as running but was not present in',
'the job queue, so it has been marked as failed.',
)
)
j.save(update_fields=['status', 'start_args', 'job_explanation'])
if hasattr(j, 'send_notification_templates'):
j.send_notification_templates('failed')
j.websocket_emit_status(status)
logger.error(f'{j.log_format} is no longer {status_before}; reaping')
logger.error('{} is no longer running; reaping'.format(j.log_format))
def reap_waiting(instance=None, status='failed', job_explanation=None, grace_period=None, excluded_uuids=None, ref_time=None):
def reap(instance=None, status='failed', excluded_uuids=[]):
"""
Reap all jobs in waiting for this instance.
"""
if grace_period is None:
grace_period = settings.JOB_WAITING_GRACE_PERIOD + settings.TASK_MANAGER_TIMEOUT
me = instance
if me is None:
try:
me = Instance.objects.me()
except RuntimeError as e:
logger.warning(f'Local instance is not registered, not running reaper: {e}')
return
if ref_time is None:
ref_time = tz_now()
jobs = UnifiedJob.objects.filter(status='waiting', modified__lte=ref_time - timedelta(seconds=grace_period), controller_node=me.hostname)
if excluded_uuids:
jobs = jobs.exclude(celery_task_id__in=excluded_uuids)
for j in jobs:
reap_job(j, status, job_explanation=job_explanation)
def reap(instance=None, status='failed', job_explanation=None, excluded_uuids=None):
"""
Reap all jobs in running for this instance.
Reap all jobs in waiting|running for this instance.
"""
me = instance
if me is None:
@@ -89,11 +61,12 @@ def reap(instance=None, status='failed', job_explanation=None, excluded_uuids=No
except RuntimeError as e:
logger.warning(f'Local instance is not registered, not running reaper: {e}')
return
now = tz_now()
workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id
jobs = UnifiedJob.objects.filter(
Q(status='running') & (Q(execution_node=me.hostname) | Q(controller_node=me.hostname)) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
)
if excluded_uuids:
jobs = jobs.exclude(celery_task_id__in=excluded_uuids)
(Q(status='running') | Q(status='waiting', modified__lte=now - timedelta(seconds=60)))
& (Q(execution_node=me.hostname) | Q(controller_node=me.hostname))
& ~Q(polymorphic_ctype_id=workflow_ctype_id)
).exclude(celery_task_id__in=excluded_uuids)
for j in jobs:
reap_job(j, status, job_explanation=job_explanation)
reap_job(j, status)

View File

@@ -17,7 +17,6 @@ from django.conf import settings
from awx.main.dispatch.pool import WorkerPool
from awx.main.dispatch import pg_bus_conn
from awx.main.utils.common import log_excess_runtime
if 'run_callback_receiver' in sys.argv:
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
@@ -63,7 +62,7 @@ class AWXConsumerBase(object):
def control(self, body):
logger.warning(f'Received control signal:\n{body}')
control = body.get('control')
if control in ('status', 'running', 'cancel'):
if control in ('status', 'running'):
reply_queue = body['reply_to']
if control == 'status':
msg = '\n'.join([self.listening_on, self.pool.debug()])
@@ -72,17 +71,6 @@ class AWXConsumerBase(object):
for worker in self.pool.workers:
worker.calculate_managed_tasks()
msg.extend(worker.managed_tasks.keys())
elif control == 'cancel':
msg = []
task_ids = set(body['task_ids'])
for worker in self.pool.workers:
task = worker.current_task
if task and task['uuid'] in task_ids:
logger.warn(f'Sending SIGTERM to task id={task["uuid"]}, task={task.get("task")}, args={task.get("args")}')
os.kill(worker.pid, signal.SIGTERM)
msg.append(task['uuid'])
if task_ids and not msg:
logger.info(f'Could not locate running tasks to cancel with ids={task_ids}')
with pg_bus_conn() as conn:
conn.notify(reply_queue, json.dumps(msg))
@@ -93,9 +81,6 @@ class AWXConsumerBase(object):
logger.error('unrecognized control message: {}'.format(control))
def process_task(self, body):
if isinstance(body, dict):
body['time_ack'] = time.time()
if 'control' in body:
try:
return self.control(body)
@@ -116,7 +101,6 @@ class AWXConsumerBase(object):
self.total_messages += 1
self.record_statistics()
@log_excess_runtime(logger)
def record_statistics(self):
if time.time() - self.last_stats > 1: # buffer stat recording to once per second
try:
@@ -165,7 +149,7 @@ class AWXConsumerPG(AWXConsumerBase):
while True:
try:
with pg_bus_conn(new_connection=True) as conn:
with pg_bus_conn() as conn:
for queue in self.queues:
conn.listen(queue)
if init is False:

View File

@@ -167,27 +167,17 @@ class CallbackBrokerWorker(BaseWorker):
try:
cls.objects.bulk_create(events)
metrics_bulk_events_saved += len(events)
except Exception as exc:
logger.warning(f'Error in events bulk_create, will try indiviually up to 5 errors, error {str(exc)}')
except Exception:
# if an exception occurs, we should re-attempt to save the
# events one-by-one, because something in the list is
# broken/stale
consecutive_errors = 0
events_saved = 0
metrics_events_batch_save_errors += 1
for e in events:
try:
e.save()
events_saved += 1
consecutive_errors = 0
except Exception as exc_indv:
consecutive_errors += 1
logger.info(f'Database Error Saving individual Job Event, error {str(exc_indv)}')
if consecutive_errors >= 5:
raise
metrics_singular_events_saved += events_saved
if events_saved == 0:
raise
metrics_singular_events_saved += 1
except Exception:
logger.exception('Database Error Saving Job Event')
metrics_duration_to_save = time.perf_counter() - metrics_duration_to_save
for e in events:
if not getattr(e, '_skip_websocket_message', False):
@@ -267,18 +257,17 @@ class CallbackBrokerWorker(BaseWorker):
try:
self.flush(force=flush)
break
except (OperationalError, InterfaceError, InternalError) as exc:
except (OperationalError, InterfaceError, InternalError):
if retries >= self.MAX_RETRIES:
logger.exception('Worker could not re-establish database connectivity, giving up on one or more events.')
return
delay = 60 * retries
logger.warning(f'Database Error Flushing Job Events, retry #{retries + 1} in {delay} seconds: {str(exc)}')
logger.exception('Database Error Saving Job Event, retry #{i} in {delay} seconds:'.format(i=retries + 1, delay=delay))
django_connection.close()
time.sleep(delay)
retries += 1
except DatabaseError:
logger.exception('Database Error Flushing Job Events')
django_connection.close()
logger.exception('Database Error Saving Job Event')
break
except Exception as exc:
tb = traceback.format_exc()

View File

@@ -3,7 +3,6 @@ import logging
import importlib
import sys
import traceback
import time
from kubernetes.config import kube_config
@@ -61,19 +60,8 @@ class TaskWorker(BaseWorker):
# the callable is a class, e.g., RunJob; instantiate and
# return its `run()` method
_call = _call().run
log_extra = ''
logger_method = logger.debug
if ('time_ack' in body) and ('time_pub' in body):
time_publish = body['time_ack'] - body['time_pub']
time_waiting = time.time() - body['time_ack']
if time_waiting > 5.0 or time_publish > 5.0:
# If task too a very long time to process, add this information to the log
log_extra = f' took {time_publish:.4f} to ack, {time_waiting:.4f} in local dispatcher'
logger_method = logger.info
# don't print kwargs, they often contain launch-time secrets
logger_method(f'task {uuid} starting {task}(*{args}){log_extra}')
logger.debug('task {} starting {}(*{})'.format(uuid, task, args))
return _call(*args, **kwargs)
def perform_work(self, body):

View File

@@ -862,7 +862,7 @@ class Command(BaseCommand):
overwrite_vars=bool(options.get('overwrite_vars', False)),
)
inventory_update = inventory_source.create_inventory_update(
_eager_fields=dict(status='running', job_args=json.dumps(sys.argv), job_env=dict(os.environ.items()), job_cwd=os.getcwd())
_eager_fields=dict(job_args=json.dumps(sys.argv), job_env=dict(os.environ.items()), job_cwd=os.getcwd())
)
data = AnsibleInventoryLoader(source=source, verbosity=verbosity).load()

View File

@@ -54,7 +54,7 @@ class Command(BaseCommand):
capacity = f' capacity={x.capacity}' if x.node_type != 'hop' else ''
version = f" version={x.version or '?'}" if x.node_type != 'hop' else ''
heartbeat = f' heartbeat="{x.last_seen:%Y-%m-%d %H:%M:%S}"' if x.capacity or x.node_type == 'hop' else ''
heartbeat = f' heartbeat="{x.modified:%Y-%m-%d %H:%M:%S}"' if x.capacity or x.node_type == 'hop' else ''
print(f'\t{color}{x.hostname}{capacity} node_type={x.node_type}{version}{heartbeat}\033[0m')
print()

View File

@@ -1,14 +1,13 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved.
import logging
import yaml
from django.conf import settings
from django.core.cache import cache as django_cache
from django.core.management.base import BaseCommand
from django.db import connection as django_connection
from awx.main.dispatch import get_local_queuename
from awx.main.dispatch import get_local_queuename, reaper
from awx.main.dispatch.control import Control
from awx.main.dispatch.pool import AutoscalePool
from awx.main.dispatch.worker import AWXConsumerPG, TaskWorker
@@ -31,16 +30,7 @@ class Command(BaseCommand):
'--reload',
dest='reload',
action='store_true',
help=('cause the dispatcher to recycle all of its worker processes; running jobs will run to completion first'),
)
parser.add_argument(
'--cancel',
dest='cancel',
help=(
'Cancel a particular task id. Takes either a single id string, or a JSON list of multiple ids. '
'Can take in output from the --running argument as input to cancel all tasks. '
'Only running tasks can be canceled, queued tasks must be started before they can be canceled.'
),
help=('cause the dispatcher to recycle all of its worker processes;' 'running jobs will run to completion first'),
)
def handle(self, *arg, **options):
@@ -52,16 +42,6 @@ class Command(BaseCommand):
return
if options.get('reload'):
return Control('dispatcher').control({'control': 'reload'})
if options.get('cancel'):
cancel_str = options.get('cancel')
try:
cancel_data = yaml.safe_load(cancel_str)
except Exception:
cancel_data = [cancel_str]
if not isinstance(cancel_data, list):
cancel_data = [cancel_str]
print(Control('dispatcher').cancel(cancel_data))
return
# It's important to close these because we're _about_ to fork, and we
# don't want the forked processes to inherit the open sockets
@@ -73,6 +53,7 @@ class Command(BaseCommand):
# (like the node heartbeat)
periodic.run_continuously()
reaper.startup_reaping()
consumer = None
try:

View File

@@ -95,13 +95,8 @@ class Command(BaseCommand):
# database migrations are still running
from awx.main.models.ha import Instance
try:
executor = MigrationExecutor(connection)
migrating = bool(executor.migration_plan(executor.loader.graph.leaf_nodes()))
except Exception as exc:
logger.info(f'Error on startup of run_wsbroadcast (error: {exc}), retry in 10s...')
time.sleep(10)
return
executor = MigrationExecutor(connection)
migrating = bool(executor.migration_plan(executor.loader.graph.leaf_nodes()))
# In containerized deployments, migrations happen in the task container,
# and the services running there don't start until migrations are

View File

@@ -18,7 +18,7 @@ def forwards(apps, schema_editor):
class Migration(migrations.Migration):
dependencies = [
('main', '0169_jt_prompt_everything_on_launch'),
('main', '0164_remove_inventorysource_update_on_project_update'),
]
operations = [

View File

@@ -1,35 +0,0 @@
# Generated by Django 3.2.13 on 2022-08-10 14:03
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0164_remove_inventorysource_update_on_project_update'),
]
operations = [
migrations.AddField(
model_name='unifiedjob',
name='preferred_instance_groups_cache',
field=models.JSONField(
blank=True, default=None, editable=False, help_text='A cached list with pk values from preferred instance groups.', null=True
),
),
migrations.AddField(
model_name='unifiedjob',
name='task_impact',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Number of forks an instance consumes when running this job.'),
),
migrations.AddField(
model_name='workflowapproval',
name='expires',
field=models.DateTimeField(
default=None,
editable=False,
help_text='The time this approval will expire. This is the created time plus timeout, used for filtering.',
null=True,
),
),
]

View File

@@ -1,40 +0,0 @@
# Generated by Django 3.2.13 on 2022-07-06 13:19
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('main', '0165_task_manager_refactor'),
]
operations = [
migrations.AlterField(
model_name='adhoccommandevent',
name='host',
field=models.ForeignKey(
db_constraint=False,
default=None,
editable=False,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='ad_hoc_command_events',
to='main.host',
),
),
migrations.AlterField(
model_name='jobevent',
name='host',
field=models.ForeignKey(
db_constraint=False,
default=None,
editable=False,
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name='job_events_as_primary_host',
to='main.host',
),
),
]

View File

@@ -1,57 +0,0 @@
# Generated by Django 3.2.13 on 2022-08-24 14:02
from django.db import migrations, models
import django.db.models.deletion
from awx.main.models import CredentialType
from awx.main.utils.common import set_current_apps
def setup_tower_managed_defaults(apps, schema_editor):
set_current_apps(apps)
CredentialType.setup_tower_managed_defaults(apps)
class Migration(migrations.Migration):
dependencies = [
('main', '0166_alter_jobevent_host'),
]
operations = [
migrations.AddField(
model_name='project',
name='signature_validation_credential',
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='projects_signature_validation',
to='main.credential',
help_text='An optional credential used for validating files in the project against unexpected changes.',
),
),
migrations.AlterField(
model_name='credentialtype',
name='kind',
field=models.CharField(
choices=[
('ssh', 'Machine'),
('vault', 'Vault'),
('net', 'Network'),
('scm', 'Source Control'),
('cloud', 'Cloud'),
('registry', 'Container Registry'),
('token', 'Personal Access Token'),
('insights', 'Insights'),
('external', 'External'),
('kubernetes', 'Kubernetes'),
('galaxy', 'Galaxy/Automation Hub'),
('cryptography', 'Cryptography'),
],
max_length=32,
),
),
migrations.RunPython(setup_tower_managed_defaults),
]

View File

@@ -1,25 +0,0 @@
# Generated by Django 3.2.13 on 2022-09-08 16:03
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0167_project_signature_validation_credential'),
]
operations = [
migrations.AddField(
model_name='inventoryupdate',
name='scm_revision',
field=models.CharField(
blank=True,
default='',
editable=False,
help_text='The SCM Revision from the Project used for this inventory update. Only applicable to inventories source from scm',
max_length=1024,
verbose_name='SCM Revision',
),
),
]

View File

@@ -1,225 +0,0 @@
# Generated by Django 3.2.13 on 2022-09-15 14:07
import awx.main.fields
import awx.main.utils.polymorphic
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('main', '0168_inventoryupdate_scm_revision'),
]
operations = [
migrations.AddField(
model_name='joblaunchconfig',
name='execution_environment',
field=models.ForeignKey(
blank=True,
default=None,
help_text='The container image to be used for execution.',
null=True,
on_delete=awx.main.utils.polymorphic.SET_NULL,
related_name='joblaunchconfig_as_prompt',
to='main.executionenvironment',
),
),
migrations.AddField(
model_name='joblaunchconfig',
name='labels',
field=models.ManyToManyField(related_name='joblaunchconfig_labels', to='main.Label'),
),
migrations.AddField(
model_name='jobtemplate',
name='ask_execution_environment_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='jobtemplate',
name='ask_forks_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='jobtemplate',
name='ask_instance_groups_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='jobtemplate',
name='ask_job_slice_count_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='jobtemplate',
name='ask_labels_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='jobtemplate',
name='ask_timeout_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='schedule',
name='execution_environment',
field=models.ForeignKey(
blank=True,
default=None,
help_text='The container image to be used for execution.',
null=True,
on_delete=awx.main.utils.polymorphic.SET_NULL,
related_name='schedule_as_prompt',
to='main.executionenvironment',
),
),
migrations.AddField(
model_name='schedule',
name='labels',
field=models.ManyToManyField(related_name='schedule_labels', to='main.Label'),
),
migrations.AddField(
model_name='workflowjobnode',
name='execution_environment',
field=models.ForeignKey(
blank=True,
default=None,
help_text='The container image to be used for execution.',
null=True,
on_delete=awx.main.utils.polymorphic.SET_NULL,
related_name='workflowjobnode_as_prompt',
to='main.executionenvironment',
),
),
migrations.AddField(
model_name='workflowjobnode',
name='labels',
field=models.ManyToManyField(related_name='workflowjobnode_labels', to='main.Label'),
),
migrations.AddField(
model_name='workflowjobtemplate',
name='ask_labels_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='workflowjobtemplate',
name='ask_skip_tags_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='workflowjobtemplate',
name='ask_tags_on_launch',
field=awx.main.fields.AskForField(blank=True, default=False),
),
migrations.AddField(
model_name='workflowjobtemplatenode',
name='execution_environment',
field=models.ForeignKey(
blank=True,
default=None,
help_text='The container image to be used for execution.',
null=True,
on_delete=awx.main.utils.polymorphic.SET_NULL,
related_name='workflowjobtemplatenode_as_prompt',
to='main.executionenvironment',
),
),
migrations.AddField(
model_name='workflowjobtemplatenode',
name='labels',
field=models.ManyToManyField(related_name='workflowjobtemplatenode_labels', to='main.Label'),
),
migrations.CreateModel(
name='WorkflowJobTemplateNodeBaseInstanceGroupMembership',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
('workflowjobtemplatenode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.workflowjobtemplatenode')),
],
),
migrations.CreateModel(
name='WorkflowJobNodeBaseInstanceGroupMembership',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
('workflowjobnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.workflowjobnode')),
],
),
migrations.CreateModel(
name='WorkflowJobInstanceGroupMembership',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
('workflowjobnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.workflowjob')),
],
),
migrations.CreateModel(
name='ScheduleInstanceGroupMembership',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
('schedule', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.schedule')),
],
),
migrations.CreateModel(
name='JobLaunchConfigInstanceGroupMembership',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
('joblaunchconfig', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.joblaunchconfig')),
],
),
migrations.AddField(
model_name='joblaunchconfig',
name='instance_groups',
field=awx.main.fields.OrderedManyToManyField(
blank=True, editable=False, related_name='joblaunchconfigs', through='main.JobLaunchConfigInstanceGroupMembership', to='main.InstanceGroup'
),
),
migrations.AddField(
model_name='schedule',
name='instance_groups',
field=awx.main.fields.OrderedManyToManyField(
blank=True, editable=False, related_name='schedule_instance_groups', through='main.ScheduleInstanceGroupMembership', to='main.InstanceGroup'
),
),
migrations.AddField(
model_name='workflowjob',
name='instance_groups',
field=awx.main.fields.OrderedManyToManyField(
blank=True,
editable=False,
related_name='workflow_job_instance_groups',
through='main.WorkflowJobInstanceGroupMembership',
to='main.InstanceGroup',
),
),
migrations.AddField(
model_name='workflowjobnode',
name='instance_groups',
field=awx.main.fields.OrderedManyToManyField(
blank=True,
editable=False,
related_name='workflow_job_node_instance_groups',
through='main.WorkflowJobNodeBaseInstanceGroupMembership',
to='main.InstanceGroup',
),
),
migrations.AddField(
model_name='workflowjobtemplatenode',
name='instance_groups',
field=awx.main.fields.OrderedManyToManyField(
blank=True,
editable=False,
related_name='workflow_job_template_node_instance_groups',
through='main.WorkflowJobTemplateNodeBaseInstanceGroupMembership',
to='main.InstanceGroup',
),
),
]

View File

@@ -1,18 +0,0 @@
# Generated by Django 3.2.13 on 2022-09-26 20:54
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0170_node_and_link_state'),
]
operations = [
migrations.AddField(
model_name='instance',
name='health_check_started',
field=models.DateTimeField(editable=False, help_text='The last time a health check was initiated on this instance.', null=True),
),
]

View File

@@ -1,29 +0,0 @@
# Generated by Django 3.2.13 on 2022-09-29 18:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0171_add_health_check_started'),
]
operations = [
migrations.AddField(
model_name='inventory',
name='prevent_instance_group_fallback',
field=models.BooleanField(
default=False,
help_text='If enabled, the inventory will prevent adding any organization instance groups to the list of preferred instances groups to run associated job templates on.If this setting is enabled and you provided an empty list, the global instance groups will be applied.',
),
),
migrations.AddField(
model_name='jobtemplate',
name='prevent_instance_group_fallback',
field=models.BooleanField(
default=False,
help_text='If enabled, the job template will prevent adding any inventory or organization instance groups to the list of preferred instances groups to run on.If this setting is enabled and you provided an empty list, the global instance groups will be applied.',
),
),
]

View File

@@ -36,7 +36,7 @@ def create_clearsessions_jt(apps, schema_editor):
if created:
sched = Schedule(
name='Cleanup Expired Sessions',
rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1' % schedule_time,
rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1;COUNT=1' % schedule_time,
description='Cleans out expired browser sessions',
enabled=True,
created=now_dt,
@@ -69,7 +69,7 @@ def create_cleartokens_jt(apps, schema_editor):
if created:
sched = Schedule(
name='Cleanup Expired OAuth 2 Tokens',
rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1' % schedule_time,
rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1;COUNT=1' % schedule_time,
description='Removes expired OAuth 2 access and refresh tokens',
enabled=True,
created=now_dt,

View File

@@ -90,9 +90,6 @@ class AdHocCommand(UnifiedJob, JobNotificationMixin):
extra_vars_dict = VarsDictProperty('extra_vars', True)
def _set_default_dependencies_processed(self):
self.dependencies_processed = True
def clean_inventory(self):
inv = self.inventory
if not inv:
@@ -181,12 +178,12 @@ class AdHocCommand(UnifiedJob, JobNotificationMixin):
def get_passwords_needed_to_start(self):
return self.passwords_needed_to_start
def _get_task_impact(self):
@property
def task_impact(self):
# NOTE: We sorta have to assume the host count matches and that forks default to 5
if self.inventory:
count_hosts = self.inventory.total_hosts
else:
count_hosts = 5
from awx.main.models.inventory import Host
count_hosts = Host.objects.filter(enabled=True, inventory__ad_hoc_commands__pk=self.pk).count()
return min(count_hosts, 5 if self.forks == 0 else self.forks) + 1
def copy(self):
@@ -210,32 +207,23 @@ class AdHocCommand(UnifiedJob, JobNotificationMixin):
def save(self, *args, **kwargs):
update_fields = kwargs.get('update_fields', [])
def add_to_update_fields(name):
if name not in update_fields:
update_fields.append(name)
if not self.preferred_instance_groups_cache:
self.preferred_instance_groups_cache = self._get_preferred_instance_group_cache()
add_to_update_fields("preferred_instance_groups_cache")
if not self.name:
self.name = Truncator(u': '.join(filter(None, (self.module_name, self.module_args)))).chars(512)
add_to_update_fields("name")
if self.task_impact == 0:
self.task_impact = self._get_task_impact()
add_to_update_fields("task_impact")
if 'name' not in update_fields:
update_fields.append('name')
super(AdHocCommand, self).save(*args, **kwargs)
@property
def preferred_instance_groups(self):
selected_groups = []
if self.inventory is not None and self.inventory.organization is not None:
organization_groups = [x for x in self.inventory.organization.instance_groups.all()]
else:
organization_groups = []
if self.inventory is not None:
for instance_group in self.inventory.instance_groups.all():
selected_groups.append(instance_group)
if not self.inventory.prevent_instance_group_fallback and self.inventory.organization is not None:
for instance_group in self.inventory.organization.instance_groups.all():
selected_groups.append(instance_group)
inventory_groups = [x for x in self.inventory.instance_groups.all()]
else:
inventory_groups = []
selected_groups = inventory_groups + organization_groups
if not selected_groups:
return self.global_instance_groups
return selected_groups

View File

@@ -316,17 +316,16 @@ class PrimordialModel(HasEditsMixin, CreatedModifiedModel):
user = get_current_user()
if user and not user.id:
user = None
if (not self.pk) and (user is not None) and (not self.created_by):
if not self.pk and not self.created_by:
self.created_by = user
if 'created_by' not in update_fields:
update_fields.append('created_by')
# Update modified_by if any editable fields have changed
new_values = self._get_fields_snapshot()
if (not self.pk and not self.modified_by) or self._values_have_edits(new_values):
if self.modified_by != user:
self.modified_by = user
if 'modified_by' not in update_fields:
update_fields.append('modified_by')
self.modified_by = user
if 'modified_by' not in update_fields:
update_fields.append('modified_by')
super(PrimordialModel, self).save(*args, **kwargs)
self._prior_values_store = new_values

View File

@@ -336,7 +336,6 @@ class CredentialType(CommonModelNameNotUnique):
('external', _('External')),
('kubernetes', _('Kubernetes')),
('galaxy', _('Galaxy/Automation Hub')),
('cryptography', _('Cryptography')),
)
kind = models.CharField(max_length=32, choices=KIND_CHOICES)
@@ -1172,25 +1171,6 @@ ManagedCredentialType(
},
)
ManagedCredentialType(
namespace='gpg_public_key',
kind='cryptography',
name=gettext_noop('GPG Public Key'),
inputs={
'fields': [
{
'id': 'gpg_public_key',
'label': gettext_noop('GPG Public Key'),
'type': 'string',
'secret': True,
'multiline': True,
'help_text': gettext_noop('GPG Public Key used to validate content signatures.'),
},
],
'required': ['gpg_public_key'],
},
)
class CredentialInputSource(PrimordialModel):
class Meta:

View File

@@ -25,6 +25,7 @@ analytics_logger = logging.getLogger('awx.analytics.job_events')
logger = logging.getLogger('awx.main.models.events')
__all__ = ['JobEvent', 'ProjectUpdateEvent', 'AdHocCommandEvent', 'InventoryUpdateEvent', 'SystemJobEvent']
@@ -485,18 +486,13 @@ class JobEvent(BasePlaybookEvent):
editable=False,
db_index=False,
)
# When we partitioned the table we accidentally "lost" the foreign key constraint.
# However this is good because the cascade on delete at the django layer was causing DB issues
# We are going to leave this as a foreign key but mark it as not having a DB relation and
# prevent cascading on delete.
host = models.ForeignKey(
'Host',
related_name='job_events_as_primary_host',
null=True,
default=None,
on_delete=models.DO_NOTHING,
on_delete=models.SET_NULL,
editable=False,
db_constraint=False,
)
host_name = models.CharField(
max_length=1024,
@@ -798,10 +794,6 @@ class AdHocCommandEvent(BaseCommandEvent):
editable=False,
db_index=False,
)
# We need to keep this as a FK in the model because AdHocCommand uses a ManyToMany field
# to hosts through adhoc_events. But in https://github.com/ansible/awx/pull/8236/ we
# removed the nulling of the field in case of a host going away before an event is saved
# so this needs to stay SET_NULL on the ORM level
host = models.ForeignKey(
'Host',
related_name='ad_hoc_command_events',
@@ -809,7 +801,6 @@ class AdHocCommandEvent(BaseCommandEvent):
default=None,
on_delete=models.SET_NULL,
editable=False,
db_constraint=False,
)
host_name = models.CharField(
max_length=1024,

View File

@@ -12,7 +12,6 @@ from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from django.conf import settings
from django.utils.timezone import now, timedelta
from django.db.models import Sum
import redis
from solo.models import SingletonModel
@@ -114,11 +113,6 @@ class Instance(HasPolicyEditsMixin, BaseModel):
editable=False,
help_text=_('Last time instance ran its heartbeat task for main cluster nodes. Last known connection to receptor mesh for execution nodes.'),
)
health_check_started = models.DateTimeField(
null=True,
editable=False,
help_text=_("The last time a health check was initiated on this instance."),
)
last_health_check = models.DateTimeField(
null=True,
editable=False,
@@ -184,13 +178,10 @@ class Instance(HasPolicyEditsMixin, BaseModel):
def consumed_capacity(self):
capacity_consumed = 0
if self.node_type in ('hybrid', 'execution'):
capacity_consumed += (
UnifiedJob.objects.filter(execution_node=self.hostname, status__in=('running', 'waiting')).aggregate(Sum("task_impact"))["task_impact__sum"]
or 0
)
capacity_consumed += sum(x.task_impact for x in UnifiedJob.objects.filter(execution_node=self.hostname, status__in=('running', 'waiting')))
if self.node_type in ('hybrid', 'control'):
capacity_consumed += (
settings.AWX_CONTROL_NODE_TASK_IMPACT * UnifiedJob.objects.filter(controller_node=self.hostname, status__in=('running', 'waiting')).count()
capacity_consumed += sum(
settings.AWX_CONTROL_NODE_TASK_IMPACT for x in UnifiedJob.objects.filter(controller_node=self.hostname, status__in=('running', 'waiting'))
)
return capacity_consumed
@@ -212,14 +203,6 @@ class Instance(HasPolicyEditsMixin, BaseModel):
def jobs_total(self):
return UnifiedJob.objects.filter(execution_node=self.hostname).count()
@property
def health_check_pending(self):
if self.health_check_started is None:
return False
if self.last_health_check is None:
return True
return self.health_check_started > self.last_health_check
def get_cleanup_task_kwargs(self, **kwargs):
"""
Produce options to use for the command: ansible-runner worker cleanup
@@ -249,28 +232,27 @@ class Instance(HasPolicyEditsMixin, BaseModel):
return True
if ref_time is None:
ref_time = now()
grace_period = settings.CLUSTER_NODE_HEARTBEAT_PERIOD * settings.CLUSTER_NODE_MISSED_HEARTBEAT_TOLERANCE
grace_period = settings.CLUSTER_NODE_HEARTBEAT_PERIOD * 2
if self.node_type in ('execution', 'hop'):
grace_period += settings.RECEPTOR_SERVICE_ADVERTISEMENT_PERIOD
return self.last_seen < ref_time - timedelta(seconds=grace_period)
def mark_offline(self, update_last_seen=False, perform_save=True, errors=''):
if self.node_state not in (Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
return []
return
if self.node_state == Instance.States.UNAVAILABLE and self.errors == errors and (not update_last_seen):
return []
return
self.node_state = Instance.States.UNAVAILABLE
self.cpu_capacity = self.mem_capacity = self.capacity = 0
self.errors = errors
if update_last_seen:
self.last_seen = now()
update_fields = ['node_state', 'capacity', 'cpu_capacity', 'mem_capacity', 'errors']
if update_last_seen:
update_fields += ['last_seen']
if perform_save:
update_fields = ['node_state', 'capacity', 'cpu_capacity', 'mem_capacity', 'errors']
if update_last_seen:
update_fields += ['last_seen']
self.save(update_fields=update_fields)
return update_fields
def set_capacity_value(self):
"""Sets capacity according to capacity adjustment rule (no save)"""
@@ -328,8 +310,7 @@ class Instance(HasPolicyEditsMixin, BaseModel):
self.node_state = Instance.States.READY
update_fields.append('node_state')
else:
fields_to_update = self.mark_offline(perform_save=False, errors=errors)
update_fields.extend(fields_to_update)
self.mark_offline(perform_save=False, errors=errors)
update_fields.extend(['cpu_capacity', 'mem_capacity', 'capacity'])
# disabling activity stream will avoid extra queries, which is important for heatbeat actions
@@ -438,20 +419,6 @@ def on_instance_group_saved(sender, instance, created=False, raw=False, **kwargs
@receiver(post_save, sender=Instance)
def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
if settings.IS_K8S and instance.node_type in (Instance.Types.EXECUTION,):
if instance.node_state == Instance.States.DEPROVISIONING:
from awx.main.tasks.receptor import remove_deprovisioned_node # prevents circular import
# wait for jobs on the node to complete, then delete the
# node and kick off write_receptor_config
connection.on_commit(lambda: remove_deprovisioned_node.apply_async([instance.hostname]))
if instance.node_state == Instance.States.INSTALLED:
from awx.main.tasks.receptor import write_receptor_config # prevents circular import
# broadcast to all control instances to update their receptor configs
connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))
if created or instance.has_policy_changes():
schedule_policy_task()
@@ -498,58 +465,3 @@ class InventoryInstanceGroupMembership(models.Model):
default=None,
db_index=True,
)
class JobLaunchConfigInstanceGroupMembership(models.Model):
joblaunchconfig = models.ForeignKey('JobLaunchConfig', on_delete=models.CASCADE)
instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
position = models.PositiveIntegerField(
null=True,
default=None,
db_index=True,
)
class ScheduleInstanceGroupMembership(models.Model):
schedule = models.ForeignKey('Schedule', on_delete=models.CASCADE)
instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
position = models.PositiveIntegerField(
null=True,
default=None,
db_index=True,
)
class WorkflowJobTemplateNodeBaseInstanceGroupMembership(models.Model):
workflowjobtemplatenode = models.ForeignKey('WorkflowJobTemplateNode', on_delete=models.CASCADE)
instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
position = models.PositiveIntegerField(
null=True,
default=None,
db_index=True,
)
class WorkflowJobNodeBaseInstanceGroupMembership(models.Model):
workflowjobnode = models.ForeignKey('WorkflowJobNode', on_delete=models.CASCADE)
instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
position = models.PositiveIntegerField(
null=True,
default=None,
db_index=True,
)
class WorkflowJobInstanceGroupMembership(models.Model):
workflowjobnode = models.ForeignKey('WorkflowJob', on_delete=models.CASCADE)
instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
position = models.PositiveIntegerField(
null=True,
default=None,
db_index=True,
)

View File

@@ -63,7 +63,7 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
an inventory source contains lists and hosts.
"""
FIELDS_TO_PRESERVE_AT_COPY = ['hosts', 'groups', 'instance_groups', 'prevent_instance_group_fallback']
FIELDS_TO_PRESERVE_AT_COPY = ['hosts', 'groups', 'instance_groups']
KIND_CHOICES = [
('', _('Hosts have a direct link to this inventory.')),
('smart', _('Hosts for inventory generated using the host_filter property.')),
@@ -175,16 +175,6 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
related_name='inventory_labels',
help_text=_('Labels associated with this inventory.'),
)
prevent_instance_group_fallback = models.BooleanField(
default=False,
help_text=(
"If enabled, the inventory will prevent adding any organization "
"instance groups to the list of preferred instances groups to run "
"associated job templates on."
"If this setting is enabled and you provided an empty list, the global instance "
"groups will be applied."
),
)
def get_absolute_url(self, request=None):
return reverse('api:inventory_detail', kwargs={'pk': self.pk}, request=request)
@@ -246,12 +236,6 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
raise ParseError(_('Slice number must be 1 or higher.'))
return (number, step)
def get_sliced_hosts(self, host_queryset, slice_number, slice_count):
if slice_count > 1 and slice_number > 0:
offset = slice_number - 1
host_queryset = host_queryset[offset::slice_count]
return host_queryset
def get_script_data(self, hostvars=False, towervars=False, show_all=False, slice_number=1, slice_count=1):
hosts_kw = dict()
if not show_all:
@@ -259,8 +243,10 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
fetch_fields = ['name', 'id', 'variables', 'inventory_id']
if towervars:
fetch_fields.append('enabled')
host_queryset = self.hosts.filter(**hosts_kw).order_by('name').only(*fetch_fields)
hosts = self.get_sliced_hosts(host_queryset, slice_number, slice_count)
hosts = self.hosts.filter(**hosts_kw).order_by('name').only(*fetch_fields)
if slice_count > 1 and slice_number > 0:
offset = slice_number - 1
hosts = hosts[offset::slice_count]
data = dict()
all_group = data.setdefault('all', dict())
@@ -351,12 +337,9 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
else:
active_inventory_sources = self.inventory_sources.filter(source__in=CLOUD_INVENTORY_SOURCES)
failed_inventory_sources = active_inventory_sources.filter(last_job_failed=True)
total_hosts = active_hosts.count()
# if total_hosts has changed, set update_task_impact to True
update_task_impact = total_hosts != self.total_hosts
computed_fields = {
'has_active_failures': bool(failed_hosts.count()),
'total_hosts': total_hosts,
'total_hosts': active_hosts.count(),
'hosts_with_active_failures': failed_hosts.count(),
'total_groups': active_groups.count(),
'has_inventory_sources': bool(active_inventory_sources.count()),
@@ -374,14 +357,6 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
computed_fields.pop(field)
if computed_fields:
iobj.save(update_fields=computed_fields.keys())
if update_task_impact:
# if total hosts count has changed, re-calculate task_impact for any
# job that is still in pending for this inventory, since task_impact
# is cached on task creation and used in task management system
tasks = self.jobs.filter(status="pending")
for t in tasks:
t.task_impact = t._get_task_impact()
UnifiedJob.objects.bulk_update(tasks, ['task_impact'])
logger.debug("Finished updating inventory computed fields, pk={0}, in " "{1:.3f} seconds".format(self.pk, time.time() - start_time))
def websocket_emit_status(self, status):
@@ -1201,14 +1176,6 @@ class InventoryUpdate(UnifiedJob, InventorySourceOptions, JobNotificationMixin,
default=None,
null=True,
)
scm_revision = models.CharField(
max_length=1024,
blank=True,
default='',
editable=False,
verbose_name=_('SCM Revision'),
help_text=_('The SCM Revision from the Project used for this inventory update. Only applicable to inventories source from scm'),
)
@property
def is_container_group_task(self):
@@ -1253,7 +1220,8 @@ class InventoryUpdate(UnifiedJob, InventorySourceOptions, JobNotificationMixin,
return UnpartitionedInventoryUpdateEvent
return InventoryUpdateEvent
def _get_task_impact(self):
@property
def task_impact(self):
return 1
# InventoryUpdate credential required
@@ -1278,19 +1246,15 @@ class InventoryUpdate(UnifiedJob, InventorySourceOptions, JobNotificationMixin,
@property
def preferred_instance_groups(self):
selected_groups = []
if self.inventory_source.inventory is not None and self.inventory_source.inventory.organization is not None:
organization_groups = [x for x in self.inventory_source.inventory.organization.instance_groups.all()]
else:
organization_groups = []
if self.inventory_source.inventory is not None:
# Add the inventory sources IG to the selected IGs first
for instance_group in self.inventory_source.inventory.instance_groups.all():
selected_groups.append(instance_group)
# If the inventory allows for fallback and we have an organization then also append the orgs IGs to the end of the list
if (
not getattr(self.inventory_source.inventory, 'prevent_instance_group_fallback', False)
and self.inventory_source.inventory.organization is not None
):
for instance_group in self.inventory_source.inventory.organization.instance_groups.all():
selected_groups.append(instance_group)
inventory_groups = [x for x in self.inventory_source.inventory.instance_groups.all()]
else:
inventory_groups = []
selected_groups = inventory_groups + organization_groups
if not selected_groups:
return self.global_instance_groups
return selected_groups

View File

@@ -43,8 +43,8 @@ from awx.main.models.notifications import (
NotificationTemplate,
JobNotificationMixin,
)
from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField, polymorphic
from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob, OrderedManyToManyField
from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField
from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob
from awx.main.models.mixins import (
ResourceMixin,
SurveyJobTemplateMixin,
@@ -203,7 +203,7 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
playbook) to an inventory source with a given credential.
"""
FIELDS_TO_PRESERVE_AT_COPY = ['labels', 'instance_groups', 'credentials', 'survey_spec', 'prevent_instance_group_fallback']
FIELDS_TO_PRESERVE_AT_COPY = ['labels', 'instance_groups', 'credentials', 'survey_spec']
FIELDS_TO_DISCARD_AT_COPY = ['vault_credential', 'credential']
SOFT_UNIQUE_TOGETHER = [('polymorphic_ctype', 'name', 'organization')]
@@ -227,6 +227,15 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
blank=True,
default=False,
)
ask_limit_on_launch = AskForField(
blank=True,
default=False,
)
ask_tags_on_launch = AskForField(blank=True, default=False, allows_field='job_tags')
ask_skip_tags_on_launch = AskForField(
blank=True,
default=False,
)
ask_job_type_on_launch = AskForField(
blank=True,
default=False,
@@ -235,27 +244,12 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
blank=True,
default=False,
)
ask_inventory_on_launch = AskForField(
blank=True,
default=False,
)
ask_credential_on_launch = AskForField(blank=True, default=False, allows_field='credentials')
ask_execution_environment_on_launch = AskForField(
blank=True,
default=False,
)
ask_forks_on_launch = AskForField(
blank=True,
default=False,
)
ask_job_slice_count_on_launch = AskForField(
blank=True,
default=False,
)
ask_timeout_on_launch = AskForField(
blank=True,
default=False,
)
ask_instance_groups_on_launch = AskForField(
blank=True,
default=False,
)
ask_scm_branch_on_launch = AskForField(blank=True, default=False, allows_field='scm_branch')
job_slice_count = models.PositiveIntegerField(
blank=True,
default=1,
@@ -274,15 +268,6 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
'admin_role',
],
)
prevent_instance_group_fallback = models.BooleanField(
default=False,
help_text=(
"If enabled, the job template will prevent adding any inventory or organization "
"instance groups to the list of preferred instances groups to run on."
"If this setting is enabled and you provided an empty list, the global instance "
"groups will be applied."
),
)
@classmethod
def _get_unified_job_class(cls):
@@ -291,17 +276,7 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
@classmethod
def _get_unified_job_field_names(cls):
return set(f.name for f in JobOptions._meta.fields) | set(
[
'name',
'description',
'organization',
'survey_passwords',
'labels',
'credentials',
'job_slice_number',
'job_slice_count',
'execution_environment',
]
['name', 'description', 'organization', 'survey_passwords', 'labels', 'credentials', 'job_slice_number', 'job_slice_count', 'execution_environment']
)
@property
@@ -339,13 +314,10 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
actual_inventory = self.inventory
if self.ask_inventory_on_launch and 'inventory' in kwargs:
actual_inventory = kwargs['inventory']
actual_slice_count = self.job_slice_count
if self.ask_job_slice_count_on_launch and 'job_slice_count' in kwargs:
actual_slice_count = kwargs['job_slice_count']
if actual_inventory:
return min(actual_slice_count, actual_inventory.hosts.count())
return min(self.job_slice_count, actual_inventory.hosts.count())
else:
return actual_slice_count
return self.job_slice_count
def save(self, *args, **kwargs):
update_fields = kwargs.get('update_fields', [])
@@ -453,15 +425,10 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
field = self._meta.get_field(field_name)
if isinstance(field, models.ManyToManyField):
if field_name == 'instance_groups':
# Instance groups are ordered so we can't make a set out of them
old_value = old_value.all()
elif field_name == 'credentials':
# Credentials have a weird pattern because of how they are layered
old_value = set(old_value.all())
new_value = set(kwargs[field_name]) - old_value
if not new_value:
continue
old_value = set(old_value.all())
new_value = set(kwargs[field_name]) - old_value
if not new_value:
continue
if new_value == old_value:
# no-op case: Fields the same as template's value
@@ -482,10 +449,6 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
rejected_data[field_name] = new_value
errors_dict[field_name] = _('Project does not allow override of branch.')
continue
elif field_name == 'job_slice_count' and (new_value > 1) and (self.get_effective_slice_ct(kwargs) <= 1):
rejected_data[field_name] = new_value
errors_dict[field_name] = _('Job inventory does not have enough hosts for slicing')
continue
# accepted prompt
prompted_data[field_name] = new_value
else:
@@ -637,19 +600,6 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
def get_ui_url(self):
return urljoin(settings.TOWER_URL_BASE, "/#/jobs/playbook/{}".format(self.pk))
def _set_default_dependencies_processed(self):
"""
This sets the initial value of dependencies_processed
and here we use this as a shortcut to avoid the DependencyManager for jobs that do not need it
"""
if (not self.project) or self.project.scm_update_on_launch:
self.dependencies_processed = False
elif (not self.inventory) or self.inventory.inventory_sources.filter(update_on_launch=True).exists():
self.dependencies_processed = False
else:
# No dependencies to process
self.dependencies_processed = True
@property
def event_class(self):
if self.has_unpartitioned_events:
@@ -694,7 +644,8 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
raise ParseError(_('{status_value} is not a valid status option.').format(status_value=status))
return self._get_hosts(**kwargs)
def _get_task_impact(self):
@property
def task_impact(self):
if self.launch_type == 'callback':
count_hosts = 2
else:
@@ -804,15 +755,19 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
@property
def preferred_instance_groups(self):
# If the user specified instance groups those will be handled by the unified_job.create_unified_job
# This function handles only the defaults for a template w/o user specification
selected_groups = []
for obj_type in ['job_template', 'inventory', 'organization']:
if getattr(self, obj_type) is not None:
for instance_group in getattr(self, obj_type).instance_groups.all():
selected_groups.append(instance_group)
if getattr(getattr(self, obj_type), 'prevent_instance_group_fallback', False):
break
if self.organization is not None:
organization_groups = [x for x in self.organization.instance_groups.all()]
else:
organization_groups = []
if self.inventory is not None:
inventory_groups = [x for x in self.inventory.instance_groups.all()]
else:
inventory_groups = []
if self.job_template is not None:
template_groups = [x for x in self.job_template.instance_groups.all()]
else:
template_groups = []
selected_groups = template_groups + inventory_groups + organization_groups
if not selected_groups:
return self.global_instance_groups
return selected_groups
@@ -847,8 +802,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
def _get_inventory_hosts(self, only=['name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id']):
if not self.inventory:
return []
host_queryset = self.inventory.hosts.only(*only)
return self.inventory.get_sliced_hosts(host_queryset, self.job_slice_number, self.job_slice_count)
return self.inventory.hosts.only(*only)
def start_job_fact_cache(self, destination, modification_times, timeout=None):
self.log_lifecycle("start_job_fact_cache")
@@ -893,7 +847,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
continue
host.ansible_facts = ansible_facts
host.ansible_facts_modified = now()
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
host.save()
system_tracking_logger.info(
'New fact for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)),
extra=dict(
@@ -939,36 +893,10 @@ class LaunchTimeConfigBase(BaseModel):
# This is a solution to the nullable CharField problem, specific to prompting
char_prompts = JSONBlob(default=dict, blank=True)
# Define fields that are not really fields, but alias to char_prompts lookups
limit = NullablePromptPseudoField('limit')
scm_branch = NullablePromptPseudoField('scm_branch')
job_tags = NullablePromptPseudoField('job_tags')
skip_tags = NullablePromptPseudoField('skip_tags')
diff_mode = NullablePromptPseudoField('diff_mode')
job_type = NullablePromptPseudoField('job_type')
verbosity = NullablePromptPseudoField('verbosity')
forks = NullablePromptPseudoField('forks')
job_slice_count = NullablePromptPseudoField('job_slice_count')
timeout = NullablePromptPseudoField('timeout')
# NOTE: additional fields are assumed to exist but must be defined in subclasses
# due to technical limitations
SUBCLASS_FIELDS = (
'instance_groups', # needs a through model defined
'extra_vars', # alternates between extra_vars and extra_data
'credentials', # already a unified job and unified JT field
'labels', # already a unified job and unified JT field
'execution_environment', # already a unified job and unified JT field
)
def prompts_dict(self, display=False, for_cls=None):
def prompts_dict(self, display=False):
data = {}
if for_cls:
cls = for_cls
else:
cls = JobTemplate
# Some types may have different prompts, but always subset of JT prompts
for prompt_name in cls.get_ask_mapping().keys():
for prompt_name in JobTemplate.get_ask_mapping().keys():
try:
field = self._meta.get_field(prompt_name)
except FieldDoesNotExist:
@@ -976,23 +904,18 @@ class LaunchTimeConfigBase(BaseModel):
if isinstance(field, models.ManyToManyField):
if not self.pk:
continue # unsaved object can't have related many-to-many
prompt_values = list(getattr(self, prompt_name).all())
# Many to manys can't distinguish between None and []
# Because of this, from a config perspective, we assume [] is none and we don't save [] into the config
if len(prompt_values) > 0:
data[prompt_name] = prompt_values
prompt_val = set(getattr(self, prompt_name).all())
if len(prompt_val) > 0:
data[prompt_name] = prompt_val
elif prompt_name == 'extra_vars':
if self.extra_vars:
extra_vars = {}
if display:
extra_vars = self.display_extra_vars()
data[prompt_name] = self.display_extra_vars()
else:
extra_vars = self.extra_vars
data[prompt_name] = self.extra_vars
# Depending on model, field type may save and return as string
if isinstance(extra_vars, str):
extra_vars = parse_yaml_or_json(extra_vars)
if extra_vars:
data['extra_vars'] = extra_vars
if isinstance(data[prompt_name], str):
data[prompt_name] = parse_yaml_or_json(data[prompt_name])
if self.survey_passwords and not display:
data['survey_passwords'] = self.survey_passwords
else:
@@ -1002,6 +925,15 @@ class LaunchTimeConfigBase(BaseModel):
return data
for field_name in JobTemplate.get_ask_mapping().keys():
if field_name == 'extra_vars':
continue
try:
LaunchTimeConfigBase._meta.get_field(field_name)
except FieldDoesNotExist:
setattr(LaunchTimeConfigBase, field_name, NullablePromptPseudoField(field_name))
class LaunchTimeConfig(LaunchTimeConfigBase):
"""
Common model for all objects that save details of a saved launch config
@@ -1020,18 +952,8 @@ class LaunchTimeConfig(LaunchTimeConfigBase):
blank=True,
)
)
# Fields needed for non-unified job / unified JT models, because they are defined on unified models
# Credentials needed for non-unified job / unified JT models
credentials = models.ManyToManyField('Credential', related_name='%(class)ss')
labels = models.ManyToManyField('Label', related_name='%(class)s_labels')
execution_environment = models.ForeignKey(
'ExecutionEnvironment',
null=True,
blank=True,
default=None,
on_delete=polymorphic.SET_NULL,
related_name='%(class)s_as_prompt',
help_text="The container image to be used for execution.",
)
@property
def extra_vars(self):
@@ -1075,11 +997,6 @@ class JobLaunchConfig(LaunchTimeConfig):
editable=False,
)
# Instance Groups needed for non-unified job / unified JT models
instance_groups = OrderedManyToManyField(
'InstanceGroup', related_name='%(class)ss', blank=True, editable=False, through='JobLaunchConfigInstanceGroupMembership'
)
def has_user_prompts(self, template):
"""
Returns True if any fields exist in the launch config that are
@@ -1296,9 +1213,6 @@ class SystemJob(UnifiedJob, SystemJobOptions, JobNotificationMixin):
extra_vars_dict = VarsDictProperty('extra_vars', True)
def _set_default_dependencies_processed(self):
self.dependencies_processed = True
@classmethod
def _get_parent_field_name(cls):
return 'system_job_template'
@@ -1324,7 +1238,8 @@ class SystemJob(UnifiedJob, SystemJobOptions, JobNotificationMixin):
return UnpartitionedSystemJobEvent
return SystemJobEvent
def _get_task_impact(self):
@property
def task_impact(self):
return 5
@property

View File

@@ -10,8 +10,6 @@ from awx.api.versioning import reverse
from awx.main.models.base import CommonModelNameNotUnique
from awx.main.models.unified_jobs import UnifiedJobTemplate, UnifiedJob
from awx.main.models.inventory import Inventory
from awx.main.models.schedules import Schedule
from awx.main.models.workflow import WorkflowJobTemplateNode, WorkflowJobNode
__all__ = ('Label',)
@@ -36,22 +34,16 @@ class Label(CommonModelNameNotUnique):
def get_absolute_url(self, request=None):
return reverse('api:label_detail', kwargs={'pk': self.pk}, request=request)
@staticmethod
def get_orphaned_labels():
return Label.objects.filter(organization=None, unifiedjobtemplate_labels__isnull=True, inventory_labels__isnull=True)
def is_detached(self):
return Label.objects.filter(
id=self.id,
unifiedjob_labels__isnull=True,
unifiedjobtemplate_labels__isnull=True,
inventory_labels__isnull=True,
schedule_labels__isnull=True,
workflowjobtemplatenode_labels__isnull=True,
workflowjobnode_labels__isnull=True,
).exists()
return Label.objects.filter(id=self.id, unifiedjob_labels__isnull=True, unifiedjobtemplate_labels__isnull=True, inventory_labels__isnull=True).exists()
def is_candidate_for_detach(self):
count = UnifiedJob.objects.filter(labels__in=[self.id]).count() # Both Jobs and WFJobs
count += UnifiedJobTemplate.objects.filter(labels__in=[self.id]).count() # Both JTs and WFJT
count += Inventory.objects.filter(labels__in=[self.id]).count()
count += Schedule.objects.filter(labels__in=[self.id]).count()
count += WorkflowJobTemplateNode.objects.filter(labels__in=[self.id]).count()
count += WorkflowJobNode.objects.filter(labels__in=[self.id]).count()
return (count - 1) == 0
c1 = UnifiedJob.objects.filter(labels__in=[self.id]).count()
c2 = UnifiedJobTemplate.objects.filter(labels__in=[self.id]).count()
c3 = Inventory.objects.filter(labels__in=[self.id]).count()
return (c1 + c2 + c3 - 1) == 0

View File

@@ -104,33 +104,6 @@ class SurveyJobTemplateMixin(models.Model):
default=False,
)
survey_spec = prevent_search(JSONBlob(default=dict, blank=True))
ask_inventory_on_launch = AskForField(
blank=True,
default=False,
)
ask_limit_on_launch = AskForField(
blank=True,
default=False,
)
ask_scm_branch_on_launch = AskForField(
blank=True,
default=False,
allows_field='scm_branch',
)
ask_labels_on_launch = AskForField(
blank=True,
default=False,
)
ask_tags_on_launch = AskForField(
blank=True,
default=False,
allows_field='job_tags',
)
ask_skip_tags_on_launch = AskForField(
blank=True,
default=False,
)
ask_variables_on_launch = AskForField(blank=True, default=False, allows_field='extra_vars')
def survey_password_variables(self):
@@ -439,11 +412,6 @@ class TaskManagerJobMixin(TaskManagerUnifiedJobMixin):
class Meta:
abstract = True
def get_jobs_fail_chain(self):
if self.project_update_id:
return [self.project_update]
return []
class TaskManagerUpdateOnLaunchMixin(TaskManagerUnifiedJobMixin):
class Meta:

View File

@@ -284,17 +284,6 @@ class Project(UnifiedJobTemplate, ProjectOptions, ResourceMixin, CustomVirtualEn
help_text=_('Allow changing the SCM branch or revision in a job template ' 'that uses this project.'),
)
# credential (keys) used to validate content signature
signature_validation_credential = models.ForeignKey(
'Credential',
related_name='%(class)ss_signature_validation',
blank=True,
null=True,
default=None,
on_delete=models.SET_NULL,
help_text=_('An optional credential used for validating files in the project against unexpected changes.'),
)
scm_revision = models.CharField(
max_length=1024,
blank=True,
@@ -524,9 +513,6 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
help_text=_('The SCM Revision discovered by this update for the given project and branch.'),
)
def _set_default_dependencies_processed(self):
self.dependencies_processed = True
def _get_parent_field_name(self):
return 'project'
@@ -574,7 +560,8 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
return UnpartitionedProjectUpdateEvent
return ProjectUpdateEvent
def _get_task_impact(self):
@property
def task_impact(self):
return 0 if self.job_type == 'run' else 1
@property
@@ -631,10 +618,6 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
added_update_fields = []
if not self.job_tags:
job_tags = ['update_{}'.format(self.scm_type), 'install_roles', 'install_collections']
if self.project.signature_validation_credential is not None:
credential_type = self.project.signature_validation_credential.credential_type.namespace
job_tags.append(f'validation_{credential_type}')
job_tags.append('validation_checksum_manifest')
self.job_tags = ','.join(job_tags)
added_update_fields.append('job_tags')
if self.scm_delete_on_update and 'delete' not in self.job_tags and self.job_type == 'check':

View File

@@ -18,7 +18,6 @@ from django.utils.translation import gettext_lazy as _
# AWX
from awx.api.versioning import reverse
from awx.main.fields import OrderedManyToManyField
from awx.main.models.base import PrimordialModel
from awx.main.models.jobs import LaunchTimeConfig
from awx.main.utils import ignore_inventory_computed_fields
@@ -84,13 +83,6 @@ class Schedule(PrimordialModel, LaunchTimeConfig):
)
rrule = models.TextField(help_text=_("A value representing the schedules iCal recurrence rule."))
next_run = models.DateTimeField(null=True, default=None, editable=False, help_text=_("The next time that the scheduled action will run."))
instance_groups = OrderedManyToManyField(
'InstanceGroup',
related_name='schedule_instance_groups',
blank=True,
editable=False,
through='ScheduleInstanceGroupMembership',
)
@classmethod
def get_zoneinfo(cls):

View File

@@ -45,8 +45,7 @@ from awx.main.utils.common import (
get_type_for_model,
parse_yaml_or_json,
getattr_dne,
ScheduleDependencyManager,
ScheduleTaskManager,
schedule_task_manager,
get_event_partition_epoch,
get_capacity_type,
)
@@ -332,11 +331,10 @@ class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, ExecutionEn
return NotificationTemplate.objects.none()
def create_unified_job(self, instance_groups=None, **kwargs):
def create_unified_job(self, **kwargs):
"""
Create a new unified job based on this unified job template.
"""
# TODO: rename kwargs to prompts, to set expectation that these are runtime values
new_job_passwords = kwargs.pop('survey_passwords', {})
eager_fields = kwargs.pop('_eager_fields', None)
@@ -383,14 +381,6 @@ class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, ExecutionEn
unified_job.survey_passwords = new_job_passwords
kwargs['survey_passwords'] = new_job_passwords # saved in config object for relaunch
if instance_groups:
unified_job.preferred_instance_groups_cache = [ig.id for ig in instance_groups]
else:
unified_job.preferred_instance_groups_cache = unified_job._get_preferred_instance_group_cache()
unified_job._set_default_dependencies_processed()
unified_job.task_impact = unified_job._get_task_impact()
from awx.main.signals import disable_activity_stream, activity_stream_create
with disable_activity_stream():
@@ -416,17 +406,13 @@ class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, ExecutionEn
unified_job.handle_extra_data(validated_kwargs['extra_vars'])
# Create record of provided prompts for relaunch and rescheduling
config = unified_job.create_config_from_prompts(kwargs, parent=self)
if instance_groups:
for ig in instance_groups:
config.instance_groups.add(ig)
unified_job.create_config_from_prompts(kwargs, parent=self)
# manually issue the create activity stream entry _after_ M2M relations
# have been associated to the UJ
if unified_job.__class__ in activity_stream_registrar.models:
activity_stream_create(None, unified_job, True)
unified_job.log_lifecycle("created")
return unified_job
@classmethod
@@ -707,14 +693,6 @@ class UnifiedJob(
on_delete=polymorphic.SET_NULL,
help_text=_('The Instance group the job was run under'),
)
preferred_instance_groups_cache = models.JSONField(
blank=True,
null=True,
default=None,
editable=False,
help_text=_("A cached list with pk values from preferred instance groups."),
)
task_impact = models.PositiveIntegerField(default=0, editable=False, help_text=_("Number of forks an instance consumes when running this job."))
organization = models.ForeignKey(
'Organization',
blank=True,
@@ -776,9 +754,6 @@ class UnifiedJob(
def _get_parent_field_name(self):
return 'unified_job_template' # Override in subclasses.
def _get_preferred_instance_group_cache(self):
return [ig.pk for ig in self.preferred_instance_groups]
@classmethod
def _get_unified_job_template_class(cls):
"""
@@ -833,9 +808,6 @@ class UnifiedJob(
update_fields = self._update_parent_instance_no_save(parent_instance)
parent_instance.save(update_fields=update_fields)
def _set_default_dependencies_processed(self):
pass
def save(self, *args, **kwargs):
"""Save the job, with current status, to the database.
Ensure that all data is consistent before doing so.
@@ -849,8 +821,7 @@ class UnifiedJob(
# If this job already exists in the database, retrieve a copy of
# the job in its prior state.
# If update_fields are given without status, then that indicates no change
if self.pk and ((not update_fields) or ('status' in update_fields)):
if self.pk:
self_before = self.__class__.objects.get(pk=self.pk)
if self_before.status != self.status:
status_before = self_before.status
@@ -981,38 +952,22 @@ class UnifiedJob(
valid_fields.extend(['survey_passwords', 'extra_vars'])
else:
kwargs.pop('survey_passwords', None)
many_to_many_fields = []
for field_name, value in kwargs.items():
if field_name not in valid_fields:
raise Exception('Unrecognized launch config field {}.'.format(field_name))
field = None
# may use extra_data as a proxy for extra_vars
if field_name in config.SUBCLASS_FIELDS and field_name != 'extra_vars':
field = config._meta.get_field(field_name)
if isinstance(field, models.ManyToManyField):
many_to_many_fields.append(field_name)
if field_name == 'credentials':
continue
if isinstance(field, (models.ForeignKey)) and (value is None):
continue # the null value indicates not-provided for ForeignKey case
setattr(config, field_name, value)
key = field_name
if key == 'extra_vars':
key = 'extra_data'
setattr(config, key, value)
config.save()
for field_name in many_to_many_fields:
prompted_items = kwargs.get(field_name, [])
if not prompted_items:
continue
if field_name == 'instance_groups':
# Here we are doing a loop to make sure we preserve order for this Ordered field
# also do not merge IGs with parent, so this saves the literal list
for item in prompted_items:
getattr(config, field_name).add(item)
else:
# Assuming this field merges prompts with parent, save just the diff
if field_name in [field.name for field in parent._meta.get_fields()]:
prompted_items = set(prompted_items) - set(getattr(parent, field_name).all())
if prompted_items:
getattr(config, field_name).add(*prompted_items)
job_creds = set(kwargs.get('credentials', []))
if 'credentials' in [field.name for field in parent._meta.get_fields()]:
job_creds = job_creds - set(parent.credentials.all())
if job_creds:
config.credentials.add(*job_creds)
return config
@property
@@ -1071,6 +1026,7 @@ class UnifiedJob(
event_qs = self.get_event_queryset()
except NotImplementedError:
return True # Model without events, such as WFJT
self.log_lifecycle("event_processing_finished")
return self.emitted_events == event_qs.count()
def result_stdout_raw_handle(self, enforce_max_bytes=True):
@@ -1285,8 +1241,9 @@ class UnifiedJob(
except JobLaunchConfig.DoesNotExist:
return False
def _get_task_impact(self):
return self.task_impact # return default, should implement in subclass.
@property
def task_impact(self):
raise NotImplementedError # Implement in subclass.
def websocket_emit_data(self):
'''Return extra data that should be included when submitting data to the browser over the websocket connection'''
@@ -1298,7 +1255,7 @@ class UnifiedJob(
def _websocket_emit_status(self, status):
try:
status_data = dict(unified_job_id=self.id, status=status)
if status == 'running':
if status == 'waiting':
if self.instance_group:
status_data['instance_group_name'] = self.instance_group.name
else:
@@ -1401,10 +1358,7 @@ class UnifiedJob(
self.update_fields(start_args=json.dumps(kwargs), status='pending')
self.websocket_emit_status("pending")
if self.dependencies_processed:
ScheduleTaskManager().schedule()
else:
ScheduleDependencyManager().schedule()
schedule_task_manager()
# Each type of unified job has a different Task class; get the
# appropirate one.
@@ -1419,6 +1373,22 @@ class UnifiedJob(
# Done!
return True
@property
def actually_running(self):
# returns True if the job is running in the appropriate dispatcher process
running = False
if all([self.status == 'running', self.celery_task_id, self.execution_node]):
# If the job is marked as running, but the dispatcher
# doesn't know about it (or the dispatcher doesn't reply),
# then cancel the job
timeout = 5
try:
running = self.celery_task_id in ControlDispatcher('dispatcher', self.controller_node or self.execution_node).running(timeout=timeout)
except (socket.timeout, RuntimeError):
logger.error('could not reach dispatcher on {} within {}s'.format(self.execution_node, timeout))
running = False
return running
@property
def can_cancel(self):
return bool(self.status in CAN_CANCEL)
@@ -1428,61 +1398,27 @@ class UnifiedJob(
return 'Previous Task Canceled: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (self.model_to_str(), self.name, self.id)
return None
def fallback_cancel(self):
if not self.celery_task_id:
self.refresh_from_db(fields=['celery_task_id'])
self.cancel_dispatcher_process()
def cancel_dispatcher_process(self):
"""Returns True if dispatcher running this job acknowledged request and sent SIGTERM"""
if not self.celery_task_id:
return
canceled = []
try:
# Use control and reply mechanism to cancel and obtain confirmation
timeout = 5
canceled = ControlDispatcher('dispatcher', self.controller_node).cancel([self.celery_task_id])
except socket.timeout:
logger.error(f'could not reach dispatcher on {self.controller_node} within {timeout}s')
except Exception:
logger.exception("error encountered when checking task status")
return bool(self.celery_task_id in canceled) # True or False, whether confirmation was obtained
def cancel(self, job_explanation=None, is_chain=False):
if self.can_cancel:
if not is_chain:
for x in self.get_jobs_fail_chain():
x.cancel(job_explanation=self._build_job_explanation(), is_chain=True)
cancel_fields = []
if not self.cancel_flag:
self.cancel_flag = True
self.start_args = '' # blank field to remove encrypted passwords
cancel_fields.extend(['cancel_flag', 'start_args'])
connection.on_commit(lambda: self.websocket_emit_status("canceled"))
cancel_fields = ['cancel_flag', 'start_args']
if self.status in ('pending', 'waiting', 'new'):
self.status = 'canceled'
cancel_fields.append('status')
if self.status == 'running' and not self.actually_running:
self.status = 'canceled'
cancel_fields.append('status')
if job_explanation is not None:
self.job_explanation = job_explanation
cancel_fields.append('job_explanation')
controller_notified = False
if self.celery_task_id:
controller_notified = self.cancel_dispatcher_process()
else:
# Avoid race condition where we have stale model from pending state but job has already started,
# its checking signal but not cancel_flag, so re-send signal after this database commit
connection.on_commit(self.fallback_cancel)
# If a SIGTERM signal was sent to the control process, and acked by the dispatcher
# then we want to let its own cleanup change status, otherwise change status now
if not controller_notified:
if self.status != 'canceled':
self.status = 'canceled'
cancel_fields.append('status')
self.save(update_fields=cancel_fields)
self.save(update_fields=cancel_fields)
self.websocket_emit_status("canceled")
return self.cancel_flag
@property
@@ -1579,8 +1515,8 @@ class UnifiedJob(
'state': state,
'work_unit_id': self.work_unit_id,
}
if self.name:
extra["task_name"] = self.name
if self.unified_job_template:
extra["template_name"] = self.unified_job_template.name
if state == "blocked" and blocked_by:
blocked_by_msg = f"{blocked_by._meta.model_name}-{blocked_by.id}"
msg = f"{self._meta.model_name}-{self.id} blocked by {blocked_by_msg}"
@@ -1592,7 +1528,7 @@ class UnifiedJob(
extra["controller_node"] = self.controller_node or "NOT_SET"
elif state == "execution_node_chosen":
extra["execution_node"] = self.execution_node or "NOT_SET"
logger_job_lifecycle.info(msg, extra=extra)
logger_job_lifecycle.debug(msg, extra=extra)
@property
def launched_by(self):

View File

@@ -13,7 +13,6 @@ from django.db import connection, models
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from django.core.exceptions import ObjectDoesNotExist
from django.utils.timezone import now, timedelta
# from django import settings as tower_settings
@@ -29,7 +28,7 @@ from awx.main.models import prevent_search, accepts_json, UnifiedJobTemplate, Un
from awx.main.models.notifications import NotificationTemplate, JobNotificationMixin
from awx.main.models.base import CreatedModifiedModel, VarsDictProperty
from awx.main.models.rbac import ROLE_SINGLETON_SYSTEM_ADMINISTRATOR, ROLE_SINGLETON_SYSTEM_AUDITOR
from awx.main.fields import ImplicitRoleField, JSONBlob, OrderedManyToManyField
from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob
from awx.main.models.mixins import (
ResourceMixin,
SurveyJobTemplateMixin,
@@ -41,7 +40,7 @@ from awx.main.models.mixins import (
from awx.main.models.jobs import LaunchTimeConfigBase, LaunchTimeConfig, JobTemplate
from awx.main.models.credential import Credential
from awx.main.redact import REPLACE_STR
from awx.main.utils import ScheduleWorkflowManager
from awx.main.utils import schedule_task_manager
__all__ = [
@@ -114,9 +113,6 @@ class WorkflowNodeBase(CreatedModifiedModel, LaunchTimeConfig):
'credentials',
'char_prompts',
'all_parents_must_converge',
'labels',
'instance_groups',
'execution_environment',
]
def create_workflow_job_node(self, **kwargs):
@@ -125,7 +121,7 @@ class WorkflowNodeBase(CreatedModifiedModel, LaunchTimeConfig):
"""
create_kwargs = {}
for field_name in self._get_workflow_job_field_names():
if field_name in ['credentials', 'labels', 'instance_groups']:
if field_name == 'credentials':
continue
if field_name in kwargs:
create_kwargs[field_name] = kwargs[field_name]
@@ -135,20 +131,10 @@ class WorkflowNodeBase(CreatedModifiedModel, LaunchTimeConfig):
new_node = WorkflowJobNode.objects.create(**create_kwargs)
if self.pk:
allowed_creds = self.credentials.all()
allowed_labels = self.labels.all()
allowed_instance_groups = self.instance_groups.all()
else:
allowed_creds = []
allowed_labels = []
allowed_instance_groups = []
for cred in allowed_creds:
new_node.credentials.add(cred)
for label in allowed_labels:
new_node.labels.add(label)
for instance_group in allowed_instance_groups:
new_node.instance_groups.add(instance_group)
return new_node
@@ -166,9 +152,6 @@ class WorkflowJobTemplateNode(WorkflowNodeBase):
'char_prompts',
'all_parents_must_converge',
'identifier',
'labels',
'execution_environment',
'instance_groups',
]
REENCRYPTION_BLOCKLIST_AT_COPY = ['extra_data', 'survey_passwords']
@@ -183,13 +166,6 @@ class WorkflowJobTemplateNode(WorkflowNodeBase):
blank=False,
help_text=_('An identifier for this node that is unique within its workflow. ' 'It is copied to workflow job nodes corresponding to this node.'),
)
instance_groups = OrderedManyToManyField(
'InstanceGroup',
related_name='workflow_job_template_node_instance_groups',
blank=True,
editable=False,
through='WorkflowJobTemplateNodeBaseInstanceGroupMembership',
)
class Meta:
app_label = 'main'
@@ -234,7 +210,7 @@ class WorkflowJobTemplateNode(WorkflowNodeBase):
approval_template = WorkflowApprovalTemplate(**kwargs)
approval_template.save()
self.unified_job_template = approval_template
self.save(update_fields=['unified_job_template'])
self.save()
return approval_template
@@ -273,9 +249,6 @@ class WorkflowJobNode(WorkflowNodeBase):
blank=True, # blank denotes pre-migration job nodes
help_text=_('An identifier coresponding to the workflow job template node that this node was created from.'),
)
instance_groups = OrderedManyToManyField(
'InstanceGroup', related_name='workflow_job_node_instance_groups', blank=True, editable=False, through='WorkflowJobNodeBaseInstanceGroupMembership'
)
class Meta:
app_label = 'main'
@@ -291,6 +264,19 @@ class WorkflowJobNode(WorkflowNodeBase):
def get_absolute_url(self, request=None):
return reverse('api:workflow_job_node_detail', kwargs={'pk': self.pk}, request=request)
def prompts_dict(self, *args, **kwargs):
r = super(WorkflowJobNode, self).prompts_dict(*args, **kwargs)
# Explanation - WFJT extra_vars still break pattern, so they are not
# put through prompts processing, but inventory and others are only accepted
# if JT prompts for it, so it goes through this mechanism
if self.workflow_job:
if self.workflow_job.inventory_id:
# workflow job inventory takes precedence
r['inventory'] = self.workflow_job.inventory
if self.workflow_job.char_prompts:
r.update(self.workflow_job.char_prompts)
return r
def get_job_kwargs(self):
"""
In advance of creating a new unified job as part of a workflow,
@@ -300,38 +286,16 @@ class WorkflowJobNode(WorkflowNodeBase):
"""
# reject/accept prompted fields
data = {}
wj_special_vars = {}
wj_special_passwords = {}
ujt_obj = self.unified_job_template
if ujt_obj is not None:
node_prompts_data = self.prompts_dict(for_cls=ujt_obj.__class__)
wj_prompts_data = self.workflow_job.prompts_dict(for_cls=ujt_obj.__class__)
# Explanation - special historical case
# WFJT extra_vars ignored JobTemplate.ask_variables_on_launch, bypassing _accept_or_ignore_job_kwargs
# inventory and others are only accepted if JT prompts for it with related ask_ field
# this is inconsistent, but maintained
if not isinstance(ujt_obj, WorkflowJobTemplate):
wj_special_vars = wj_prompts_data.pop('extra_vars', {})
wj_special_passwords = wj_prompts_data.pop('survey_passwords', {})
elif 'extra_vars' in node_prompts_data:
# Follow the vars combination rules
node_prompts_data['extra_vars'].update(wj_prompts_data.pop('extra_vars', {}))
elif 'survey_passwords' in node_prompts_data:
node_prompts_data['survey_passwords'].update(wj_prompts_data.pop('survey_passwords', {}))
# Follow the credential combination rules
if ('credentials' in wj_prompts_data) and ('credentials' in node_prompts_data):
wj_pivoted_creds = Credential.unique_dict(wj_prompts_data['credentials'])
node_pivoted_creds = Credential.unique_dict(node_prompts_data['credentials'])
node_pivoted_creds.update(wj_pivoted_creds)
wj_prompts_data['credentials'] = [cred for cred in node_pivoted_creds.values()]
# NOTE: no special rules for instance_groups, because they do not merge
# or labels, because they do not propogate WFJT-->node at all
# Combine WFJT prompts with node here, WFJT at higher level
node_prompts_data.update(wj_prompts_data)
accepted_fields, ignored_fields, errors = ujt_obj._accept_or_ignore_job_kwargs(**node_prompts_data)
# MERGE note: move this to prompts_dict method on node when merging
# with the workflow inventory branch
prompts_data = self.prompts_dict()
if isinstance(ujt_obj, WorkflowJobTemplate):
if self.workflow_job.extra_vars:
prompts_data.setdefault('extra_vars', {})
prompts_data['extra_vars'].update(self.workflow_job.extra_vars_dict)
accepted_fields, ignored_fields, errors = ujt_obj._accept_or_ignore_job_kwargs(**prompts_data)
if errors:
logger.info(
_('Bad launch configuration starting template {template_pk} as part of ' 'workflow {workflow_pk}. Errors:\n{error_text}').format(
@@ -339,6 +303,15 @@ class WorkflowJobNode(WorkflowNodeBase):
)
)
data.update(accepted_fields) # missing fields are handled in the scheduler
try:
# config saved on the workflow job itself
wj_config = self.workflow_job.launch_config
except ObjectDoesNotExist:
wj_config = None
if wj_config:
accepted_fields, ignored_fields, errors = ujt_obj._accept_or_ignore_job_kwargs(**wj_config.prompts_dict())
accepted_fields.pop('extra_vars', None) # merge handled with other extra_vars later
data.update(accepted_fields)
# build ancestor artifacts, save them to node model for later
aa_dict = {}
is_root_node = True
@@ -351,12 +324,15 @@ class WorkflowJobNode(WorkflowNodeBase):
self.ancestor_artifacts = aa_dict
self.save(update_fields=['ancestor_artifacts'])
# process password list
password_dict = data.get('survey_passwords', {})
password_dict = {}
if '_ansible_no_log' in aa_dict:
for key in aa_dict:
if key != '_ansible_no_log':
password_dict[key] = REPLACE_STR
password_dict.update(wj_special_passwords)
if self.workflow_job.survey_passwords:
password_dict.update(self.workflow_job.survey_passwords)
if self.survey_passwords:
password_dict.update(self.survey_passwords)
if password_dict:
data['survey_passwords'] = password_dict
# process extra_vars
@@ -366,12 +342,12 @@ class WorkflowJobNode(WorkflowNodeBase):
functional_aa_dict = copy(aa_dict)
functional_aa_dict.pop('_ansible_no_log', None)
extra_vars.update(functional_aa_dict)
# Workflow Job extra_vars higher precedence than ancestor artifacts
extra_vars.update(wj_special_vars)
if ujt_obj and isinstance(ujt_obj, JobTemplate):
# Workflow Job extra_vars higher precedence than ancestor artifacts
if self.workflow_job and self.workflow_job.extra_vars:
extra_vars.update(self.workflow_job.extra_vars_dict)
if extra_vars:
data['extra_vars'] = extra_vars
# ensure that unified jobs created by WorkflowJobs are marked
data['_eager_fields'] = {'launch_type': 'workflow'}
if self.workflow_job and self.workflow_job.created_by:
@@ -397,10 +373,6 @@ class WorkflowJobOptions(LaunchTimeConfigBase):
)
)
)
# Workflow jobs are used for sliced jobs, and thus, must be a conduit for any JT prompts
instance_groups = OrderedManyToManyField(
'InstanceGroup', related_name='workflow_job_instance_groups', blank=True, editable=False, through='WorkflowJobInstanceGroupMembership'
)
allow_simultaneous = models.BooleanField(default=False)
extra_vars_dict = VarsDictProperty('extra_vars', True)
@@ -412,7 +384,7 @@ class WorkflowJobOptions(LaunchTimeConfigBase):
@classmethod
def _get_unified_job_field_names(cls):
r = set(f.name for f in WorkflowJobOptions._meta.fields) | set(
['name', 'description', 'organization', 'survey_passwords', 'labels', 'limit', 'scm_branch', 'job_tags', 'skip_tags']
['name', 'description', 'organization', 'survey_passwords', 'labels', 'limit', 'scm_branch']
)
r.remove('char_prompts') # needed due to copying launch config to launch config
return r
@@ -452,29 +424,26 @@ class WorkflowJobOptions(LaunchTimeConfigBase):
class WorkflowJobTemplate(UnifiedJobTemplate, WorkflowJobOptions, SurveyJobTemplateMixin, ResourceMixin, RelatedJobsMixin, WebhookTemplateMixin):
SOFT_UNIQUE_TOGETHER = [('polymorphic_ctype', 'name', 'organization')]
FIELDS_TO_PRESERVE_AT_COPY = [
'labels',
'organization',
'instance_groups',
'workflow_job_template_nodes',
'credentials',
'survey_spec',
'skip_tags',
'job_tags',
'execution_environment',
]
FIELDS_TO_PRESERVE_AT_COPY = ['labels', 'organization', 'instance_groups', 'workflow_job_template_nodes', 'credentials', 'survey_spec']
class Meta:
app_label = 'main'
notification_templates_approvals = models.ManyToManyField(
"NotificationTemplate",
ask_inventory_on_launch = AskForField(
blank=True,
related_name='%(class)s_notification_templates_for_approvals',
default=False,
)
admin_role = ImplicitRoleField(
parent_role=['singleton:' + ROLE_SINGLETON_SYSTEM_ADMINISTRATOR, 'organization.workflow_admin_role'],
ask_limit_on_launch = AskForField(
blank=True,
default=False,
)
ask_scm_branch_on_launch = AskForField(
blank=True,
default=False,
)
notification_templates_approvals = models.ManyToManyField("NotificationTemplate", blank=True, related_name='%(class)s_notification_templates_for_approvals')
admin_role = ImplicitRoleField(parent_role=['singleton:' + ROLE_SINGLETON_SYSTEM_ADMINISTRATOR, 'organization.workflow_admin_role'])
execute_role = ImplicitRoleField(
parent_role=[
'admin_role',
@@ -653,9 +622,6 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
)
is_sliced_job = models.BooleanField(default=False)
def _set_default_dependencies_processed(self):
self.dependencies_processed = True
@property
def workflow_nodes(self):
return self.workflow_job_nodes
@@ -702,7 +668,8 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
)
return result
def _get_task_impact(self):
@property
def task_impact(self):
return 0
def get_ancestor_workflows(self):
@@ -743,25 +710,6 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
artifacts.update(job.get_effective_artifacts(parents_set=new_parents_set))
return artifacts
def prompts_dict(self, *args, **kwargs):
if self.job_template_id:
# HACK: Exception for sliced jobs here, this is bad
# when sliced jobs were introduced, workflows did not have all the prompted JT fields
# so to support prompting with slicing, we abused the workflow job launch config
# these would be more properly saved on the workflow job, but it gets the wrong fields now
try:
wj_config = self.launch_config
r = wj_config.prompts_dict(*args, **kwargs)
except ObjectDoesNotExist:
r = {}
else:
r = super().prompts_dict(*args, **kwargs)
# Workflow labels and job labels are treated separately
# that means that they do not propogate from WFJT / workflow job to jobs in workflow
r.pop('labels', None)
return r
def get_notification_templates(self):
return self.workflow_job_template.notification_templates
@@ -772,10 +720,11 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
def preferred_instance_groups(self):
return []
def cancel_dispatcher_process(self):
@property
def actually_running(self):
# WorkflowJobs don't _actually_ run anything in the dispatcher, so
# there's no point in asking the dispatcher if it knows about this task
return True
return self.status == 'running'
class WorkflowApprovalTemplate(UnifiedJobTemplate, RelatedJobsMixin):
@@ -834,12 +783,6 @@ class WorkflowApproval(UnifiedJob, JobNotificationMixin):
default=0,
help_text=_("The amount of time (in seconds) before the approval node expires and fails."),
)
expires = models.DateTimeField(
default=None,
null=True,
editable=False,
help_text=_("The time this approval will expire. This is the created time plus timeout, used for filtering."),
)
timed_out = models.BooleanField(default=False, help_text=_("Shows when an approval node (with a timeout assigned to it) has timed out."))
approved_or_denied_by = models.ForeignKey(
'auth.User',
@@ -850,9 +793,6 @@ class WorkflowApproval(UnifiedJob, JobNotificationMixin):
on_delete=models.SET_NULL,
)
def _set_default_dependencies_processed(self):
self.dependencies_processed = True
@classmethod
def _get_unified_job_template_class(cls):
return WorkflowApprovalTemplate
@@ -870,32 +810,13 @@ class WorkflowApproval(UnifiedJob, JobNotificationMixin):
def _get_parent_field_name(self):
return 'workflow_approval_template'
def save(self, *args, **kwargs):
update_fields = list(kwargs.get('update_fields', []))
if self.timeout != 0 and ((not self.pk) or (not update_fields) or ('timeout' in update_fields)):
if not self.created: # on creation, created will be set by parent class, so we fudge it here
created = now()
else:
created = self.created
new_expires = created + timedelta(seconds=self.timeout)
if new_expires != self.expires:
self.expires = new_expires
if update_fields and 'expires' not in update_fields:
update_fields.append('expires')
elif self.timeout == 0 and ((not update_fields) or ('timeout' in update_fields)):
if self.expires:
self.expires = None
if update_fields and 'expires' not in update_fields:
update_fields.append('expires')
super(WorkflowApproval, self).save(*args, **kwargs)
def approve(self, request=None):
self.status = 'successful'
self.approved_or_denied_by = get_current_user()
self.save()
self.send_approval_notification('approved')
self.websocket_emit_status(self.status)
ScheduleWorkflowManager().schedule()
schedule_task_manager()
return reverse('api:workflow_approval_approve', kwargs={'pk': self.pk}, request=request)
def deny(self, request=None):
@@ -904,7 +825,7 @@ class WorkflowApproval(UnifiedJob, JobNotificationMixin):
self.save()
self.send_approval_notification('denied')
self.websocket_emit_status(self.status)
ScheduleWorkflowManager().schedule()
schedule_task_manager()
return reverse('api:workflow_approval_deny', kwargs={'pk': self.pk}, request=request)
def signal_start(self, **kwargs):

View File

@@ -1,6 +1,6 @@
# Copyright (c) 2017 Ansible, Inc.
#
from .task_manager import TaskManager, DependencyManager, WorkflowManager
from .task_manager import TaskManager
__all__ = ['TaskManager', 'DependencyManager', 'WorkflowManager']
__all__ = ['TaskManager']

View File

@@ -7,11 +7,6 @@ from awx.main.models import (
WorkflowJob,
)
import logging
logger = logging.getLogger('awx.main.scheduler.dependency_graph')
class DependencyGraph(object):
PROJECT_UPDATES = 'project_updates'
@@ -41,9 +36,6 @@ class DependencyGraph(object):
self.data[self.WORKFLOW_JOB_TEMPLATES_JOBS] = {}
def mark_if_no_key(self, job_type, id, job):
if id is None:
logger.warning(f'Null dependency graph key from {job}, could be integrity error or bug, ignoring')
return
# only mark first occurrence of a task. If 10 of JobA are launched
# (concurrent disabled), the dependency graph should return that jobs
# 2 through 10 are blocked by job1
@@ -74,10 +66,7 @@ class DependencyGraph(object):
self.mark_if_no_key(self.JOB_TEMPLATE_JOBS, job.job_template_id, job)
def mark_workflow_job(self, job):
if job.workflow_job_template_id:
self.mark_if_no_key(self.WORKFLOW_JOB_TEMPLATES_JOBS, job.workflow_job_template_id, job)
elif job.unified_job_template_id: # for sliced jobs
self.mark_if_no_key(self.WORKFLOW_JOB_TEMPLATES_JOBS, job.unified_job_template_id, job)
self.mark_if_no_key(self.WORKFLOW_JOB_TEMPLATES_JOBS, job.workflow_job_template_id, job)
def project_update_blocked_by(self, job):
return self.get_item(self.PROJECT_UPDATES, job.project_id)
@@ -96,13 +85,7 @@ class DependencyGraph(object):
def workflow_job_blocked_by(self, job):
if job.allow_simultaneous is False:
if job.workflow_job_template_id:
return self.get_item(self.WORKFLOW_JOB_TEMPLATES_JOBS, job.workflow_job_template_id)
elif job.unified_job_template_id:
# Sliced jobs can be either Job or WorkflowJob type, and either should block a sliced WorkflowJob
return self.get_item(self.WORKFLOW_JOB_TEMPLATES_JOBS, job.unified_job_template_id) or self.get_item(
self.JOB_TEMPLATE_JOBS, job.unified_job_template_id
)
return self.get_item(self.WORKFLOW_JOB_TEMPLATES_JOBS, job.workflow_job_template_id)
return None
def system_job_blocked_by(self, job):

View File

@@ -11,35 +11,31 @@ import sys
import signal
# Django
from django.db import transaction
from django.db import transaction, connection
from django.utils.translation import gettext_lazy as _, gettext_noop
from django.utils.timezone import now as tz_now
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
# AWX
from awx.main.dispatch.reaper import reap_job
from awx.main.models import (
AdHocCommand,
Instance,
InventorySource,
InventoryUpdate,
Job,
Project,
ProjectUpdate,
SystemJob,
UnifiedJob,
WorkflowApproval,
WorkflowJob,
WorkflowJobNode,
WorkflowJobTemplate,
)
from awx.main.scheduler.dag_workflow import WorkflowDAG
from awx.main.utils.pglock import advisory_lock
from awx.main.utils import (
get_type_for_model,
ScheduleTaskManager,
ScheduleWorkflowManager,
)
from awx.main.utils.common import task_manager_bulk_reschedule
from awx.main.utils import get_type_for_model, task_manager_bulk_reschedule, schedule_task_manager
from awx.main.utils.common import create_partition
from awx.main.signals import disable_activity_stream
from awx.main.constants import ACTIVE_STATES
from awx.main.scheduler.dependency_graph import DependencyGraph
@@ -57,101 +53,167 @@ def timeit(func):
t_now = time.perf_counter()
result = func(*args, **kwargs)
dur = time.perf_counter() - t_now
args[0].subsystem_metrics.inc(f"{args[0].prefix}_{func.__name__}_seconds", dur)
args[0].subsystem_metrics.inc("task_manager_" + func.__name__ + "_seconds", dur)
return result
return inner
class TaskBase:
def __init__(self, prefix=""):
self.prefix = prefix
class TaskManager:
def __init__(self):
"""
Do NOT put database queries or other potentially expensive operations
in the task manager init. The task manager object is created every time a
job is created, transitions state, and every 30 seconds on each tower node.
More often then not, the object is destroyed quickly because the NOOP case is hit.
The NOOP case is short-circuit logic. If the task manager realizes that another instance
of the task manager is already running, then it short-circuits and decides not to run.
"""
# start task limit indicates how many pending jobs can be started on this
# .schedule() run. Starting jobs is expensive, and there is code in place to reap
# the task manager after 5 minutes. At scale, the task manager can easily take more than
# 5 minutes to start pending jobs. If this limit is reached, pending jobs
# will no longer be started and will be started on the next task manager cycle.
self.start_task_limit = settings.START_TASK_LIMIT
self.time_delta_job_explanation = timedelta(seconds=30)
self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False)
# initialize each metric to 0 and force metric_has_changed to true. This
# ensures each task manager metric will be overridden when pipe_execute
# is called later.
self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False)
self.start_time = time.time()
self.start_task_limit = settings.START_TASK_LIMIT
for m in self.subsystem_metrics.METRICS:
if m.startswith(self.prefix):
if m.startswith("task_manager"):
self.subsystem_metrics.set(m, 0)
def timed_out(self):
"""Return True/False if we have met or exceeded the timeout for the task manager."""
elapsed = time.time() - self.start_time
if elapsed >= settings.TASK_MANAGER_TIMEOUT:
logger.warning(f"{self.prefix} manager has run for {elapsed} which is greater than TASK_MANAGER_TIMEOUT of {settings.TASK_MANAGER_TIMEOUT}.")
return True
return False
def after_lock_init(self, all_sorted_tasks):
"""
Init AFTER we know this instance of the task manager will run because the lock is acquired.
"""
self.dependency_graph = DependencyGraph()
self.instances = TaskManagerInstances(all_sorted_tasks)
self.instance_groups = TaskManagerInstanceGroups(instances_by_hostname=self.instances)
self.controlplane_ig = self.instance_groups.controlplane_ig
def job_blocked_by(self, task):
# TODO: I'm not happy with this, I think blocking behavior should be decided outside of the dependency graph
# in the old task manager this was handled as a method on each task object outside of the graph and
# probably has the side effect of cutting down *a lot* of the logic from this task manager class
blocked_by = self.dependency_graph.task_blocked_by(task)
if blocked_by:
return blocked_by
for dep in task.dependent_jobs.all():
if dep.status in ACTIVE_STATES:
return dep
# if we detect a failed or error dependency, go ahead and fail this
# task. The errback on the dependency takes some time to trigger,
# and we don't want the task to enter running state if its
# dependency has failed or errored.
elif dep.status in ("error", "failed"):
task.status = 'failed'
task.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
get_type_for_model(type(dep)),
dep.name,
dep.id,
)
task.save(update_fields=['status', 'job_explanation'])
task.websocket_emit_status('failed')
return dep
return None
@timeit
def get_tasks(self, filter_args):
wf_approval_ctype_id = ContentType.objects.get_for_model(WorkflowApproval).id
qs = (
UnifiedJob.objects.filter(**filter_args)
.exclude(launch_type='sync')
.exclude(polymorphic_ctype_id=wf_approval_ctype_id)
.order_by('created')
.prefetch_related('dependent_jobs')
def get_tasks(self, status_list=('pending', 'waiting', 'running')):
jobs = [j for j in Job.objects.filter(status__in=status_list).prefetch_related('instance_group')]
inventory_updates_qs = (
InventoryUpdate.objects.filter(status__in=status_list).exclude(source='file').prefetch_related('inventory_source', 'instance_group')
)
self.all_tasks = [t for t in qs]
inventory_updates = [i for i in inventory_updates_qs]
# Notice the job_type='check': we want to prevent implicit project updates from blocking our jobs.
project_updates = [p for p in ProjectUpdate.objects.filter(status__in=status_list, job_type='check').prefetch_related('instance_group')]
system_jobs = [s for s in SystemJob.objects.filter(status__in=status_list).prefetch_related('instance_group')]
ad_hoc_commands = [a for a in AdHocCommand.objects.filter(status__in=status_list).prefetch_related('instance_group')]
workflow_jobs = [w for w in WorkflowJob.objects.filter(status__in=status_list)]
all_tasks = sorted(jobs + project_updates + inventory_updates + system_jobs + ad_hoc_commands + workflow_jobs, key=lambda task: task.created)
return all_tasks
def record_aggregate_metrics(self, *args):
if not settings.IS_TESTING():
# increment task_manager_schedule_calls regardless if the other
# metrics are recorded
s_metrics.Metrics(auto_pipe_execute=True).inc(f"{self.prefix}__schedule_calls", 1)
# Only record metrics if the last time recording was more
# than SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL ago.
# Prevents a short-duration task manager that runs directly after a
# long task manager to override useful metrics.
current_time = time.time()
time_last_recorded = current_time - self.subsystem_metrics.decode(f"{self.prefix}_recorded_timestamp")
if time_last_recorded > settings.SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL:
logger.debug(f"recording {self.prefix} metrics, last recorded {time_last_recorded} seconds ago")
self.subsystem_metrics.set(f"{self.prefix}_recorded_timestamp", current_time)
self.subsystem_metrics.pipe_execute()
else:
logger.debug(f"skipping recording {self.prefix} metrics, last recorded {time_last_recorded} seconds ago")
def get_running_workflow_jobs(self):
graph_workflow_jobs = [wf for wf in WorkflowJob.objects.filter(status='running')]
return graph_workflow_jobs
def record_aggregate_metrics_and_exit(self, *args):
self.record_aggregate_metrics()
sys.exit(1)
def schedule(self):
# Lock
with task_manager_bulk_reschedule():
with advisory_lock(f"{self.prefix}_lock", wait=False) as acquired:
with transaction.atomic():
if acquired is False:
logger.debug(f"Not running {self.prefix} scheduler, another task holds lock")
return
logger.debug(f"Starting {self.prefix} Scheduler")
# if sigterm due to timeout, still record metrics
signal.signal(signal.SIGTERM, self.record_aggregate_metrics_and_exit)
self._schedule()
commit_start = time.time()
if self.prefix == "task_manager":
self.subsystem_metrics.set(f"{self.prefix}_commit_seconds", time.time() - commit_start)
self.record_aggregate_metrics()
logger.debug(f"Finishing {self.prefix} Scheduler")
class WorkflowManager(TaskBase):
def __init__(self):
super().__init__(prefix="workflow_manager")
def get_inventory_source_tasks(self, all_sorted_tasks):
inventory_ids = set()
for task in all_sorted_tasks:
if isinstance(task, Job):
inventory_ids.add(task.inventory_id)
return [invsrc for invsrc in InventorySource.objects.filter(inventory_id__in=inventory_ids, update_on_launch=True)]
@timeit
def spawn_workflow_graph_jobs(self):
def spawn_workflow_graph_jobs(self, workflow_jobs):
for workflow_job in workflow_jobs:
if workflow_job.cancel_flag:
logger.debug('Not spawning jobs for %s because it is pending cancelation.', workflow_job.log_format)
continue
dag = WorkflowDAG(workflow_job)
spawn_nodes = dag.bfs_nodes_to_run()
if spawn_nodes:
logger.debug('Spawning jobs for %s', workflow_job.log_format)
else:
logger.debug('No nodes to spawn for %s', workflow_job.log_format)
for spawn_node in spawn_nodes:
if spawn_node.unified_job_template is None:
continue
kv = spawn_node.get_job_kwargs()
job = spawn_node.unified_job_template.create_unified_job(**kv)
spawn_node.job = job
spawn_node.save()
logger.debug('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk)
can_start = True
if isinstance(spawn_node.unified_job_template, WorkflowJobTemplate):
workflow_ancestors = job.get_ancestor_workflows()
if spawn_node.unified_job_template in set(workflow_ancestors):
can_start = False
logger.info(
'Refusing to start recursive workflow-in-workflow id={}, wfjt={}, ancestors={}'.format(
job.id, spawn_node.unified_job_template.pk, [wa.pk for wa in workflow_ancestors]
)
)
display_list = [spawn_node.unified_job_template] + workflow_ancestors
job.job_explanation = gettext_noop(
"Workflow Job spawned from workflow could not start because it " "would result in recursion (spawn order, most recent first: {})"
).format(', '.join(['<{}>'.format(tmp) for tmp in display_list]))
else:
logger.debug(
'Starting workflow-in-workflow id={}, wfjt={}, ancestors={}'.format(
job.id, spawn_node.unified_job_template.pk, [wa.pk for wa in workflow_ancestors]
)
)
if not job._resources_sufficient_for_launch():
can_start = False
job.job_explanation = gettext_noop(
"Job spawned from workflow could not start because it " "was missing a related resource such as project or inventory"
)
if can_start:
if workflow_job.start_args:
start_args = json.loads(decrypt_field(workflow_job, 'start_args'))
else:
start_args = {}
can_start = job.signal_start(**start_args)
if not can_start:
job.job_explanation = gettext_noop(
"Job spawned from workflow could not start because it " "was not in the right state or required manual credentials"
)
if not can_start:
job.status = 'failed'
job.save(update_fields=['status', 'job_explanation'])
job.websocket_emit_status('failed')
# TODO: should we emit a status on the socket here similar to tasks.py awx_periodic_scheduler() ?
# emit_websocket_notification('/socket.io/jobs', '', dict(id=))
def process_finished_workflow_jobs(self, workflow_jobs):
result = []
for workflow_job in self.all_tasks:
if self.timed_out():
logger.warning("Workflow manager has reached time out while processing running workflows, exiting loop early")
ScheduleWorkflowManager().schedule()
# Do not process any more workflow jobs. Stop here.
# Maybe we should schedule another WorkflowManager run
break
for workflow_job in workflow_jobs:
dag = WorkflowDAG(workflow_job)
status_changed = False
if workflow_job.cancel_flag:
@@ -166,106 +228,99 @@ class WorkflowManager(TaskBase):
status_changed = True
else:
workflow_nodes = dag.mark_dnr_nodes()
WorkflowJobNode.objects.bulk_update(workflow_nodes, ['do_not_run'])
# If workflow is now done, we do special things to mark it as done.
for n in workflow_nodes:
n.save(update_fields=['do_not_run'])
is_done = dag.is_workflow_done()
if is_done:
has_failed, reason = dag.has_workflow_failed()
logger.debug('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful')
result.append(workflow_job.id)
new_status = 'failed' if has_failed else 'successful'
logger.debug("Transitioning {} to {} status.".format(workflow_job.log_format, new_status))
update_fields = ['status', 'start_args']
workflow_job.status = new_status
if reason:
logger.info(f'Workflow job {workflow_job.id} failed due to reason: {reason}')
workflow_job.job_explanation = gettext_noop("No error handling paths found, marking workflow as failed")
update_fields.append('job_explanation')
workflow_job.start_args = '' # blank field to remove encrypted passwords
workflow_job.save(update_fields=update_fields)
status_changed = True
if not is_done:
continue
has_failed, reason = dag.has_workflow_failed()
logger.debug('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful')
result.append(workflow_job.id)
new_status = 'failed' if has_failed else 'successful'
logger.debug("Transitioning {} to {} status.".format(workflow_job.log_format, new_status))
update_fields = ['status', 'start_args']
workflow_job.status = new_status
if reason:
logger.info(f'Workflow job {workflow_job.id} failed due to reason: {reason}')
workflow_job.job_explanation = gettext_noop("No error handling paths found, marking workflow as failed")
update_fields.append('job_explanation')
workflow_job.start_args = '' # blank field to remove encrypted passwords
workflow_job.save(update_fields=update_fields)
status_changed = True
if status_changed:
if workflow_job.spawned_by_workflow:
ScheduleWorkflowManager().schedule()
schedule_task_manager()
workflow_job.websocket_emit_status(workflow_job.status)
# Operations whose queries rely on modifications made during the atomic scheduling session
workflow_job.send_notification_templates('succeeded' if workflow_job.status == 'successful' else 'failed')
if workflow_job.status == 'running':
spawn_nodes = dag.bfs_nodes_to_run()
if spawn_nodes:
logger.debug('Spawning jobs for %s', workflow_job.log_format)
else:
logger.debug('No nodes to spawn for %s', workflow_job.log_format)
for spawn_node in spawn_nodes:
if spawn_node.unified_job_template is None:
continue
kv = spawn_node.get_job_kwargs()
job = spawn_node.unified_job_template.create_unified_job(**kv)
spawn_node.job = job
spawn_node.save()
logger.debug('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk)
can_start = True
if isinstance(spawn_node.unified_job_template, WorkflowJobTemplate):
workflow_ancestors = job.get_ancestor_workflows()
if spawn_node.unified_job_template in set(workflow_ancestors):
can_start = False
logger.info(
'Refusing to start recursive workflow-in-workflow id={}, wfjt={}, ancestors={}'.format(
job.id, spawn_node.unified_job_template.pk, [wa.pk for wa in workflow_ancestors]
)
)
display_list = [spawn_node.unified_job_template] + workflow_ancestors
job.job_explanation = gettext_noop(
"Workflow Job spawned from workflow could not start because it "
"would result in recursion (spawn order, most recent first: {})"
).format(', '.join('<{}>'.format(tmp) for tmp in display_list))
else:
logger.debug(
'Starting workflow-in-workflow id={}, wfjt={}, ancestors={}'.format(
job.id, spawn_node.unified_job_template.pk, [wa.pk for wa in workflow_ancestors]
)
)
if not job._resources_sufficient_for_launch():
can_start = False
job.job_explanation = gettext_noop(
"Job spawned from workflow could not start because it was missing a related resource such as project or inventory"
)
if can_start:
if workflow_job.start_args:
start_args = json.loads(decrypt_field(workflow_job, 'start_args'))
else:
start_args = {}
can_start = job.signal_start(**start_args)
if not can_start:
job.job_explanation = gettext_noop(
"Job spawned from workflow could not start because it was not in the right state or required manual credentials"
)
if not can_start:
job.status = 'failed'
job.save(update_fields=['status', 'job_explanation'])
job.websocket_emit_status('failed')
# TODO: should we emit a status on the socket here similar to tasks.py awx_periodic_scheduler() ?
# emit_websocket_notification('/socket.io/jobs', '', dict(id=))
return result
@timeit
def get_tasks(self, filter_args):
self.all_tasks = [wf for wf in WorkflowJob.objects.filter(**filter_args)]
def start_task(self, task, instance_group, dependent_tasks=None, instance=None):
self.subsystem_metrics.inc("task_manager_tasks_started", 1)
self.start_task_limit -= 1
if self.start_task_limit == 0:
# schedule another run immediately after this task manager
schedule_task_manager()
from awx.main.tasks.system import handle_work_error, handle_work_success
dependent_tasks = dependent_tasks or []
task_actual = {
'type': get_type_for_model(type(task)),
'id': task.id,
}
dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]
task.status = 'waiting'
(start_status, opts) = task.pre_start()
if not start_status:
task.status = 'failed'
if task.job_explanation:
task.job_explanation += ' '
task.job_explanation += 'Task failed pre-start check.'
task.save()
# TODO: run error handler to fail sub-tasks and send notifications
else:
if type(task) is WorkflowJob:
task.status = 'running'
task.send_notification_templates('running')
logger.debug('Transitioning %s to running status.', task.log_format)
schedule_task_manager()
# at this point we already have control/execution nodes selected for the following cases
else:
task.instance_group = instance_group
execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else ''
logger.debug(
f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {instance_group.name}{execution_node_msg}.'
)
with disable_activity_stream():
task.celery_task_id = str(uuid.uuid4())
task.save()
task.log_lifecycle("waiting")
def post_commit():
if task.status != 'failed' and type(task) is not WorkflowJob:
# Before task is dispatched, ensure that job_event partitions exist
create_partition(task.event_class._meta.db_table, start=task.created)
task_cls = task._get_task_class()
task_cls.apply_async(
[task.pk],
opts,
queue=task.get_queue_name(),
uuid=task.celery_task_id,
callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}],
)
task.websocket_emit_status(task.status) # adds to on_commit
connection.on_commit(post_commit)
@timeit
def _schedule(self):
self.get_tasks(dict(status__in=["running"], dependencies_processed=True))
if len(self.all_tasks) > 0:
self.spawn_workflow_graph_jobs()
class DependencyManager(TaskBase):
def __init__(self):
super().__init__(prefix="dependency_manager")
def process_running_tasks(self, running_tasks):
for task in running_tasks:
self.dependency_graph.add_job(task)
def create_project_update(self, task, project_id=None):
if project_id is None:
@@ -286,20 +341,14 @@ class DependencyManager(TaskBase):
inventory_task.status = 'pending'
inventory_task.save()
logger.debug('Spawned {} as dependency of {}'.format(inventory_task.log_format, task.log_format))
# inventory_sources = self.get_inventory_source_tasks([task])
# self.process_inventory_sources(inventory_sources)
return inventory_task
def add_dependencies(self, task, dependencies):
with disable_activity_stream():
task.dependent_jobs.add(*dependencies)
def get_inventory_source_tasks(self):
inventory_ids = set()
for task in self.all_tasks:
if isinstance(task, Job):
inventory_ids.add(task.inventory_id)
self.all_inventory_sources = [invsrc for invsrc in InventorySource.objects.filter(inventory_id__in=inventory_ids, update_on_launch=True)]
def get_latest_inventory_update(self, inventory_source):
latest_inventory_update = InventoryUpdate.objects.filter(inventory_source=inventory_source).order_by("-created")
if not latest_inventory_update.exists():
@@ -432,167 +481,16 @@ class DependencyManager(TaskBase):
return created_dependencies
def process_tasks(self):
deps = self.generate_dependencies(self.all_tasks)
self.generate_dependencies(deps)
self.subsystem_metrics.inc(f"{self.prefix}_pending_processed", len(self.all_tasks) + len(deps))
@timeit
def _schedule(self):
self.get_tasks(dict(status__in=["pending"], dependencies_processed=False))
if len(self.all_tasks) > 0:
self.get_inventory_source_tasks()
self.process_tasks()
ScheduleTaskManager().schedule()
class TaskManager(TaskBase):
def __init__(self):
"""
Do NOT put database queries or other potentially expensive operations
in the task manager init. The task manager object is created every time a
job is created, transitions state, and every 30 seconds on each tower node.
More often then not, the object is destroyed quickly because the NOOP case is hit.
The NOOP case is short-circuit logic. If the task manager realizes that another instance
of the task manager is already running, then it short-circuits and decides not to run.
"""
# start task limit indicates how many pending jobs can be started on this
# .schedule() run. Starting jobs is expensive, and there is code in place to reap
# the task manager after 5 minutes. At scale, the task manager can easily take more than
# 5 minutes to start pending jobs. If this limit is reached, pending jobs
# will no longer be started and will be started on the next task manager cycle.
self.time_delta_job_explanation = timedelta(seconds=30)
super().__init__(prefix="task_manager")
def after_lock_init(self):
"""
Init AFTER we know this instance of the task manager will run because the lock is acquired.
"""
self.dependency_graph = DependencyGraph()
self.instances = TaskManagerInstances(self.all_tasks)
self.instance_groups = TaskManagerInstanceGroups(instances_by_hostname=self.instances)
self.controlplane_ig = self.instance_groups.controlplane_ig
def job_blocked_by(self, task):
# TODO: I'm not happy with this, I think blocking behavior should be decided outside of the dependency graph
# in the old task manager this was handled as a method on each task object outside of the graph and
# probably has the side effect of cutting down *a lot* of the logic from this task manager class
blocked_by = self.dependency_graph.task_blocked_by(task)
if blocked_by:
return blocked_by
for dep in task.dependent_jobs.all():
if dep.status in ACTIVE_STATES:
return dep
# if we detect a failed or error dependency, go ahead and fail this
# task. The errback on the dependency takes some time to trigger,
# and we don't want the task to enter running state if its
# dependency has failed or errored.
elif dep.status in ("error", "failed"):
task.status = 'failed'
task.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
get_type_for_model(type(dep)),
dep.name,
dep.id,
)
task.save(update_fields=['status', 'job_explanation'])
task.websocket_emit_status('failed')
return dep
return None
@timeit
def start_task(self, task, instance_group, dependent_tasks=None, instance=None):
self.dependency_graph.add_job(task)
self.subsystem_metrics.inc(f"{self.prefix}_tasks_started", 1)
self.start_task_limit -= 1
if self.start_task_limit == 0:
# schedule another run immediately after this task manager
ScheduleTaskManager().schedule()
from awx.main.tasks.system import handle_work_error, handle_work_success
# update capacity for control node and execution node
if task.controller_node:
self.instances[task.controller_node].consume_capacity(settings.AWX_CONTROL_NODE_TASK_IMPACT)
if task.execution_node:
self.instances[task.execution_node].consume_capacity(task.task_impact)
dependent_tasks = dependent_tasks or []
task_actual = {
'type': get_type_for_model(type(task)),
'id': task.id,
}
dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]
task.status = 'waiting'
(start_status, opts) = task.pre_start()
if not start_status:
task.status = 'failed'
if task.job_explanation:
task.job_explanation += ' '
task.job_explanation += 'Task failed pre-start check.'
task.save()
# TODO: run error handler to fail sub-tasks and send notifications
else:
if type(task) is WorkflowJob:
task.status = 'running'
task.send_notification_templates('running')
logger.debug('Transitioning %s to running status.', task.log_format)
# Call this to ensure Workflow nodes get spawned in timely manner
ScheduleWorkflowManager().schedule()
# at this point we already have control/execution nodes selected for the following cases
else:
task.instance_group = instance_group
execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else ''
logger.debug(
f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {instance_group.name}{execution_node_msg}.'
)
with disable_activity_stream():
task.celery_task_id = str(uuid.uuid4())
task.save()
task.log_lifecycle("waiting")
# apply_async does a NOTIFY to the channel dispatcher is listening to
# postgres will treat this as part of the transaction, which is what we want
if task.status != 'failed' and type(task) is not WorkflowJob:
task_cls = task._get_task_class()
task_cls.apply_async(
[task.pk],
opts,
queue=task.get_queue_name(),
uuid=task.celery_task_id,
callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}],
)
# In exception cases, like a job failing pre-start checks, we send the websocket status message
# for jobs going into waiting, we omit this because of performance issues, as it should go to running quickly
if task.status != 'waiting':
task.websocket_emit_status(task.status) # adds to on_commit
@timeit
def process_running_tasks(self, running_tasks):
for task in running_tasks:
if type(task) is WorkflowJob:
ScheduleWorkflowManager().schedule()
self.dependency_graph.add_job(task)
@timeit
def process_pending_tasks(self, pending_tasks):
running_workflow_templates = {wf.unified_job_template_id for wf in self.get_running_workflow_jobs()}
tasks_to_update_job_explanation = []
for task in pending_tasks:
if self.start_task_limit <= 0:
break
if self.timed_out():
logger.warning("Task manager has reached time out while processing pending jobs, exiting loop early")
break
blocked_by = self.job_blocked_by(task)
if blocked_by:
self.subsystem_metrics.inc(f"{self.prefix}_tasks_blocked", 1)
self.subsystem_metrics.inc("task_manager_tasks_blocked", 1)
task.log_lifecycle("blocked", blocked_by=blocked_by)
job_explanation = gettext_noop(f"waiting for {blocked_by._meta.model_name}-{blocked_by.id} to finish")
if task.job_explanation != job_explanation:
@@ -601,14 +499,19 @@ class TaskManager(TaskBase):
tasks_to_update_job_explanation.append(task)
continue
found_acceptable_queue = False
preferred_instance_groups = task.preferred_instance_groups
if isinstance(task, WorkflowJob):
# Previously we were tracking allow_simultaneous blocking both here and in DependencyGraph.
# Double check that using just the DependencyGraph works for Workflows and Sliced Jobs.
if task.unified_job_template_id in running_workflow_templates:
if not task.allow_simultaneous:
logger.debug("{} is blocked from running, workflow already running".format(task.log_format))
continue
else:
running_workflow_templates.add(task.unified_job_template_id)
self.start_task(task, None, task.get_jobs_fail_chain(), None)
continue
found_acceptable_queue = False
# Determine if there is control capacity for the task
if task.capacity_type == 'control':
control_impact = task.task_impact + settings.AWX_CONTROL_NODE_TASK_IMPACT
@@ -627,6 +530,8 @@ class TaskManager(TaskBase):
# All task.capacity_type == 'control' jobs should run on control plane, no need to loop over instance groups
if task.capacity_type == 'control':
task.execution_node = control_instance.hostname
control_instance.consume_capacity(control_impact)
self.dependency_graph.add_job(task)
execution_instance = self.instances[control_instance.hostname].obj
task.log_lifecycle("controller_node_chosen")
task.log_lifecycle("execution_node_chosen")
@@ -634,12 +539,17 @@ class TaskManager(TaskBase):
found_acceptable_queue = True
continue
for instance_group in self.instance_groups.get_instance_groups_from_task_cache(task):
for instance_group in preferred_instance_groups:
if instance_group.is_container_group:
self.dependency_graph.add_job(task)
self.start_task(task, instance_group, task.get_jobs_fail_chain(), None)
found_acceptable_queue = True
break
# TODO: remove this after we have confidence that OCP control nodes are reporting node_type=control
if settings.IS_K8S and task.capacity_type == 'execution':
logger.debug("Skipping group {}, task cannot run on control plane".format(instance_group.name))
continue
# at this point we know the instance group is NOT a container group
# because if it was, it would have started the task and broke out of the loop.
execution_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
@@ -653,7 +563,9 @@ class TaskManager(TaskBase):
control_instance = execution_instance
task.controller_node = execution_instance.hostname
control_instance.consume_capacity(settings.AWX_CONTROL_NODE_TASK_IMPACT)
task.log_lifecycle("controller_node_chosen")
execution_instance.consume_capacity(task.task_impact)
task.log_lifecycle("execution_node_chosen")
logger.debug(
"Starting {} in group {} instance {} (remaining_capacity={})".format(
@@ -661,6 +573,7 @@ class TaskManager(TaskBase):
)
)
execution_instance = self.instances[execution_instance.hostname].obj
self.dependency_graph.add_job(task)
self.start_task(task, instance_group, task.get_jobs_fail_chain(), execution_instance)
found_acceptable_queue = True
break
@@ -686,6 +599,25 @@ class TaskManager(TaskBase):
tasks_to_update_job_explanation.append(task)
logger.debug("{} couldn't be scheduled on graph, waiting for next cycle".format(task.log_format))
def timeout_approval_node(self):
workflow_approvals = WorkflowApproval.objects.filter(status='pending')
now = tz_now()
for task in workflow_approvals:
approval_timeout_seconds = timedelta(seconds=task.timeout)
if task.timeout == 0:
continue
if (now - task.created) >= approval_timeout_seconds:
timeout_message = _("The approval node {name} ({pk}) has expired after {timeout} seconds.").format(
name=task.name, pk=task.pk, timeout=task.timeout
)
logger.warning(timeout_message)
task.timed_out = True
task.status = 'failed'
task.send_approval_notification('timed_out')
task.websocket_emit_status(task.status)
task.job_explanation = timeout_message
task.save(update_fields=['status', 'job_explanation', 'timed_out'])
def reap_jobs_from_orphaned_instances(self):
# discover jobs that are in running state but aren't on an execution node
# that we know about; this is a fairly rare event, but it can occur if you,
@@ -698,45 +630,92 @@ class TaskManager(TaskBase):
logger.error(f'{j.execution_node} is not a registered instance; reaping {j.log_format}')
reap_job(j, 'failed')
def process_tasks(self):
running_tasks = [t for t in self.all_tasks if t.status in ['waiting', 'running']]
def process_tasks(self, all_sorted_tasks):
running_tasks = [t for t in all_sorted_tasks if t.status in ['waiting', 'running']]
self.process_running_tasks(running_tasks)
self.subsystem_metrics.inc(f"{self.prefix}_running_processed", len(running_tasks))
self.subsystem_metrics.inc("task_manager_running_processed", len(running_tasks))
pending_tasks = [t for t in self.all_tasks if t.status == 'pending']
pending_tasks = [t for t in all_sorted_tasks if t.status == 'pending']
undeped_tasks = [t for t in pending_tasks if not t.dependencies_processed]
dependencies = self.generate_dependencies(undeped_tasks)
deps_of_deps = self.generate_dependencies(dependencies)
dependencies += deps_of_deps
self.process_pending_tasks(dependencies)
self.subsystem_metrics.inc("task_manager_pending_processed", len(dependencies))
self.process_pending_tasks(pending_tasks)
self.subsystem_metrics.inc(f"{self.prefix}_pending_processed", len(pending_tasks))
def timeout_approval_node(self, task):
if self.timed_out():
logger.warning("Task manager has reached time out while processing approval nodes, exiting loop early")
# Do not process any more workflow approval nodes. Stop here.
# Maybe we should schedule another TaskManager run
return
timeout_message = _("The approval node {name} ({pk}) has expired after {timeout} seconds.").format(name=task.name, pk=task.pk, timeout=task.timeout)
logger.warning(timeout_message)
task.timed_out = True
task.status = 'failed'
task.send_approval_notification('timed_out')
task.websocket_emit_status(task.status)
task.job_explanation = timeout_message
task.save(update_fields=['status', 'job_explanation', 'timed_out'])
def get_expired_workflow_approvals(self):
# timeout of 0 indicates that it never expires
qs = WorkflowApproval.objects.filter(status='pending').exclude(timeout=0).filter(expires__lt=tz_now())
return qs
self.subsystem_metrics.inc("task_manager_pending_processed", len(pending_tasks))
@timeit
def _schedule(self):
self.get_tasks(dict(status__in=["pending", "waiting", "running"], dependencies_processed=True))
finished_wfjs = []
all_sorted_tasks = self.get_tasks()
self.after_lock_init()
self.reap_jobs_from_orphaned_instances()
self.after_lock_init(all_sorted_tasks)
if len(self.all_tasks) > 0:
self.process_tasks()
if len(all_sorted_tasks) > 0:
# TODO: Deal with
# latest_project_updates = self.get_latest_project_update_tasks(all_sorted_tasks)
# self.process_latest_project_updates(latest_project_updates)
for workflow_approval in self.get_expired_workflow_approvals():
self.timeout_approval_node(workflow_approval)
# latest_inventory_updates = self.get_latest_inventory_update_tasks(all_sorted_tasks)
# self.process_latest_inventory_updates(latest_inventory_updates)
self.all_inventory_sources = self.get_inventory_source_tasks(all_sorted_tasks)
running_workflow_tasks = self.get_running_workflow_jobs()
finished_wfjs = self.process_finished_workflow_jobs(running_workflow_tasks)
previously_running_workflow_tasks = running_workflow_tasks
running_workflow_tasks = []
for workflow_job in previously_running_workflow_tasks:
if workflow_job.status == 'running':
running_workflow_tasks.append(workflow_job)
else:
logger.debug('Removed %s from job spawning consideration.', workflow_job.log_format)
self.spawn_workflow_graph_jobs(running_workflow_tasks)
self.timeout_approval_node()
self.reap_jobs_from_orphaned_instances()
self.process_tasks(all_sorted_tasks)
return finished_wfjs
def record_aggregate_metrics(self, *args):
if not settings.IS_TESTING():
# increment task_manager_schedule_calls regardless if the other
# metrics are recorded
s_metrics.Metrics(auto_pipe_execute=True).inc("task_manager_schedule_calls", 1)
# Only record metrics if the last time recording was more
# than SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL ago.
# Prevents a short-duration task manager that runs directly after a
# long task manager to override useful metrics.
current_time = time.time()
time_last_recorded = current_time - self.subsystem_metrics.decode("task_manager_recorded_timestamp")
if time_last_recorded > settings.SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL:
logger.debug(f"recording metrics, last recorded {time_last_recorded} seconds ago")
self.subsystem_metrics.set("task_manager_recorded_timestamp", current_time)
self.subsystem_metrics.pipe_execute()
else:
logger.debug(f"skipping recording metrics, last recorded {time_last_recorded} seconds ago")
def record_aggregate_metrics_and_exit(self, *args):
self.record_aggregate_metrics()
sys.exit(1)
def schedule(self):
# Lock
with advisory_lock('task_manager_lock', wait=False) as acquired:
with transaction.atomic():
if acquired is False:
logger.debug("Not running scheduler, another task holds lock")
return
logger.debug("Starting Scheduler")
with task_manager_bulk_reschedule():
# if sigterm due to timeout, still record metrics
signal.signal(signal.SIGTERM, self.record_aggregate_metrics_and_exit)
self._schedule()
self.record_aggregate_metrics()
logger.debug("Finishing Scheduler")

View File

@@ -34,7 +34,7 @@ class TaskManagerInstance:
class TaskManagerInstances:
def __init__(self, active_tasks, instances=None, instance_fields=('node_type', 'capacity', 'hostname', 'enabled')):
def __init__(self, active_tasks, instances=None):
self.instances_by_hostname = dict()
if instances is None:
instances = (
@@ -69,7 +69,6 @@ class TaskManagerInstanceGroups:
def __init__(self, instances_by_hostname=None, instance_groups=None, instance_groups_queryset=None):
self.instance_groups = dict()
self.controlplane_ig = None
self.pk_ig_map = dict()
if instance_groups is not None: # for testing
self.instance_groups = instance_groups
@@ -84,7 +83,6 @@ class TaskManagerInstanceGroups:
instances_by_hostname[instance.hostname] for instance in instance_group.instances.all() if instance.hostname in instances_by_hostname
],
)
self.pk_ig_map[instance_group.pk] = instance_group
def get_remaining_capacity(self, group_name):
instances = self.instance_groups[group_name]['instances']
@@ -125,17 +123,3 @@ class TaskManagerInstanceGroups:
elif i.capacity > largest_instance.capacity:
largest_instance = i
return largest_instance
def get_instance_groups_from_task_cache(self, task):
igs = []
if task.preferred_instance_groups_cache:
for pk in task.preferred_instance_groups_cache:
ig = self.pk_ig_map.get(pk, None)
if ig:
igs.append(ig)
else:
logger.warn(f"Unknown instance group with pk {pk} for task {task}")
if len(igs) == 0:
logger.warn(f"No instance groups in cache exist, defaulting to global instance groups for task {task}")
return task.global_instance_groups
return igs

View File

@@ -1,35 +1,15 @@
# Python
import logging
# Django
from django.conf import settings
# AWX
from awx import MODE
from awx.main.scheduler import TaskManager, DependencyManager, WorkflowManager
from awx.main.scheduler import TaskManager
from awx.main.dispatch.publish import task
from awx.main.dispatch import get_local_queuename
logger = logging.getLogger('awx.main.scheduler')
def run_manager(manager, prefix):
if MODE == 'development' and settings.AWX_DISABLE_TASK_MANAGERS:
logger.debug(f"Not running {prefix} manager, AWX_DISABLE_TASK_MANAGERS is True. Trigger with GET to /api/debug/{prefix}_manager/")
return
manager().schedule()
@task(queue=get_local_queuename)
def task_manager():
run_manager(TaskManager, "task")
@task(queue=get_local_queuename)
def dependency_manager():
run_manager(DependencyManager, "dependency")
@task(queue=get_local_queuename)
def workflow_manager():
run_manager(WorkflowManager, "workflow")
def run_task_manager():
logger.debug("Running task manager.")
TaskManager().schedule()

View File

@@ -6,16 +6,17 @@ import os
import stat
# Django
from django.utils.timezone import now
from django.conf import settings
from django_guid import get_guid
from django.utils.functional import cached_property
from django.db import connections
# AWX
from awx.main.redact import UriCleaner
from awx.main.constants import MINIMAL_EVENTS, ANSIBLE_RUNNER_NEEDS_UPDATE_MESSAGE
from awx.main.utils.update_model import update_model
from awx.main.queue import CallbackQueueDispatcher
from awx.main.tasks.signals import signal_callback
logger = logging.getLogger('awx.main.tasks.callback')
@@ -174,6 +175,28 @@ class RunnerCallback:
return False
def cancel_callback(self):
"""
Ansible runner callback to tell the job when/if it is canceled
"""
unified_job_id = self.instance.pk
if signal_callback():
return True
try:
self.instance = self.update_model(unified_job_id)
except Exception:
logger.exception(f'Encountered error during cancel check for {unified_job_id}, canceling now')
return True
if not self.instance:
logger.error('unified job {} was deleted while running, canceling'.format(unified_job_id))
return True
if self.instance.cancel_flag or self.instance.status == 'canceled':
cancel_wait = (now() - self.instance.modified).seconds if self.instance.modified else 0
if cancel_wait > 5:
logger.warning('Request to cancel {} took {} seconds to complete.'.format(self.instance.log_format, cancel_wait))
return True
return False
def finished_callback(self, runner_obj):
"""
Ansible runner callback triggered on finished run
@@ -204,8 +227,6 @@ class RunnerCallback:
with disable_activity_stream():
self.instance = self.update_model(self.instance.pk, job_args=json.dumps(runner_config.command), job_cwd=runner_config.cwd, job_env=job_env)
# We opened a connection just for that save, close it here now
connections.close_all()
elif status_data['status'] == 'failed':
# For encrypted ssh_key_data, ansible-runner worker will open and write the
# ssh_key_data to a named pipe. Then, once the podman container starts, ssh-agent will

View File

@@ -1,5 +1,6 @@
# Python
from collections import OrderedDict
from distutils.dir_util import copy_tree
import errno
import functools
import fcntl
@@ -14,6 +15,7 @@ import tempfile
import traceback
import time
import urllib.parse as urlparse
from uuid import uuid4
# Django
from django.conf import settings
@@ -36,7 +38,6 @@ from awx.main.constants import (
JOB_FOLDER_PREFIX,
MAX_ISOLATED_PATH_COLON_DELIMITER,
CONTAINER_VOLUMES_MOUNT_TYPES,
ACTIVE_STATES,
)
from awx.main.models import (
Instance,
@@ -145,7 +146,7 @@ class BaseTask(object):
"""
Return params structure to be executed by the container runtime
"""
if settings.IS_K8S and instance.instance_group.is_container_group:
if settings.IS_K8S:
return {}
image = instance.execution_environment.image
@@ -210,22 +211,14 @@ class BaseTask(object):
os.chmod(path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
if settings.AWX_CLEANUP_PATHS:
self.cleanup_paths.append(path)
# We will write files in these folders later
for subfolder in ('inventory', 'env'):
# Ansible runner requires that project exists,
# and we will write files in the other folders without pre-creating the folder
for subfolder in ('project', 'inventory', 'env'):
runner_subfolder = os.path.join(path, subfolder)
if not os.path.exists(runner_subfolder):
os.mkdir(runner_subfolder)
return path
def build_project_dir(self, instance, private_data_dir):
"""
Create the ansible-runner project subdirectory. In many cases this is the source checkout.
In cases that do not even need the source checkout, we create an empty dir to be the workdir.
"""
project_dir = os.path.join(private_data_dir, 'project')
if not os.path.exists(project_dir):
os.mkdir(project_dir)
def build_private_data_files(self, instance, private_data_dir):
"""
Creates temporary files containing the private data.
@@ -361,65 +354,12 @@ class BaseTask(object):
expect_passwords[k] = passwords.get(v, '') or ''
return expect_passwords
def release_lock(self, project):
try:
fcntl.lockf(self.lock_fd, fcntl.LOCK_UN)
except IOError as e:
logger.error("I/O error({0}) while trying to release lock file [{1}]: {2}".format(e.errno, project.get_lock_file(), e.strerror))
os.close(self.lock_fd)
raise
os.close(self.lock_fd)
self.lock_fd = None
def acquire_lock(self, project, unified_job_id=None):
if not os.path.exists(settings.PROJECTS_ROOT):
os.mkdir(settings.PROJECTS_ROOT)
lock_path = project.get_lock_file()
if lock_path is None:
# If from migration or someone blanked local_path for any other reason, recoverable by save
project.save()
lock_path = project.get_lock_file()
if lock_path is None:
raise RuntimeError(u'Invalid lock file path')
try:
self.lock_fd = os.open(lock_path, os.O_RDWR | os.O_CREAT)
except OSError as e:
logger.error("I/O error({0}) while trying to open lock file [{1}]: {2}".format(e.errno, lock_path, e.strerror))
raise
start_time = time.time()
while True:
try:
fcntl.lockf(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
break
except IOError as e:
if e.errno not in (errno.EAGAIN, errno.EACCES):
os.close(self.lock_fd)
logger.error("I/O error({0}) while trying to aquire lock on file [{1}]: {2}".format(e.errno, lock_path, e.strerror))
raise
else:
time.sleep(1.0)
self.instance.refresh_from_db(fields=['cancel_flag'])
if self.instance.cancel_flag or signal_callback():
logger.debug(f"Unified job {self.instance.id} was canceled while waiting for project file lock")
return
waiting_time = time.time() - start_time
if waiting_time > 1.0:
logger.info(f'Job {unified_job_id} waited {waiting_time} to acquire lock for local source tree for path {lock_path}.')
def pre_run_hook(self, instance, private_data_dir):
"""
Hook for any steps to run before the job/task starts
"""
instance.log_lifecycle("pre_run")
# Before task is started, ensure that job_event partitions exist
create_partition(instance.event_class._meta.db_table, start=instance.created)
def post_run_hook(self, instance, status):
"""
Hook for any steps to run before job/task is marked as complete.
@@ -432,9 +372,15 @@ class BaseTask(object):
"""
instance.log_lifecycle("finalize_run")
artifact_dir = os.path.join(private_data_dir, 'artifacts', str(self.instance.id))
job_profiling_dir = os.path.join(artifact_dir, 'playbook_profiling')
awx_profiling_dir = '/var/log/tower/playbook_profiling/'
collections_info = os.path.join(artifact_dir, 'collections.json')
ansible_version_file = os.path.join(artifact_dir, 'ansible_version.txt')
if not os.path.exists(awx_profiling_dir):
os.mkdir(awx_profiling_dir)
if os.path.isdir(job_profiling_dir):
shutil.copytree(job_profiling_dir, os.path.join(awx_profiling_dir, str(instance.pk)))
if os.path.exists(collections_info):
with open(collections_info) as ee_json_info:
ee_collections_info = json.loads(ee_json_info.read())
@@ -453,11 +399,6 @@ class BaseTask(object):
Run the job/task and capture its output.
"""
self.instance = self.model.objects.get(pk=pk)
if self.instance.status != 'canceled' and self.instance.cancel_flag:
self.instance = self.update_model(self.instance.pk, start_args='', status='canceled')
if self.instance.status not in ACTIVE_STATES:
# Prevent starting the job if it has been reaped or handled by another process.
raise RuntimeError(f'Not starting {self.instance.status} task pk={pk} because {self.instance.status} is not a valid active state')
if self.instance.execution_environment_id is None:
from awx.main.signals import disable_activity_stream
@@ -483,11 +424,9 @@ class BaseTask(object):
self.instance.send_notification_templates("running")
private_data_dir = self.build_private_data_dir(self.instance)
self.pre_run_hook(self.instance, private_data_dir)
self.build_project_dir(self.instance, private_data_dir)
self.instance.log_lifecycle("preparing_playbook")
if self.instance.cancel_flag or signal_callback():
self.instance = self.update_model(self.instance.pk, status='canceled')
if self.instance.status != 'running':
# Stop the task chain and prevent starting the job if it has
# already been canceled.
@@ -590,7 +529,7 @@ class BaseTask(object):
event_handler=self.runner_callback.event_handler,
finished_callback=self.runner_callback.finished_callback,
status_handler=self.runner_callback.status_handler,
cancel_callback=signal_callback,
cancel_callback=self.runner_callback.cancel_callback,
**params,
)
else:
@@ -610,12 +549,8 @@ class BaseTask(object):
status = 'failed'
elif status == 'canceled':
self.instance = self.update_model(pk)
cancel_flag_value = getattr(self.instance, 'cancel_flag', False)
if (cancel_flag_value is False) and signal_callback():
self.runner_callback.delay_update(skip_if_already_set=True, job_explanation="Task was canceled due to receiving a shutdown signal.")
status = 'failed'
elif cancel_flag_value is False:
self.runner_callback.delay_update(skip_if_already_set=True, job_explanation="The running ansible process received a shutdown signal.")
if (getattr(self.instance, 'cancel_flag', False) is False) and signal_callback():
self.runner_callback.delay_update(job_explanation="Task was canceled due to receiving a shutdown signal.")
status = 'failed'
except ReceptorNodeNotFound as exc:
self.runner_callback.delay_update(job_explanation=str(exc))
@@ -658,141 +593,8 @@ class BaseTask(object):
raise AwxTaskError.TaskError(self.instance, rc)
class SourceControlMixin(BaseTask):
"""Utility methods for tasks that run use content from source control"""
def get_sync_needs(self, project, scm_branch=None):
project_path = project.get_project_path(check_if_exists=False)
job_revision = project.scm_revision
sync_needs = []
source_update_tag = 'update_{}'.format(project.scm_type)
branch_override = bool(scm_branch and scm_branch != project.scm_branch)
# TODO: skip syncs for inventory updates. Now, UI needs a link added so clients can link to project
# source_project is only a field on inventory sources.
if isinstance(self.instance, InventoryUpdate):
sync_needs.append(source_update_tag)
elif not project.scm_type:
pass # manual projects are not synced, user has responsibility for that
elif not os.path.exists(project_path):
logger.debug(f'Performing fresh clone of {project.id} for unified job {self.instance.id} on this instance.')
sync_needs.append(source_update_tag)
elif project.scm_type == 'git' and project.scm_revision and (not branch_override):
try:
git_repo = git.Repo(project_path)
if job_revision == git_repo.head.commit.hexsha:
logger.debug(f'Skipping project sync for {self.instance.id} because commit is locally available')
else:
sync_needs.append(source_update_tag)
except (ValueError, BadGitName, git.exc.InvalidGitRepositoryError):
logger.debug(f'Needed commit for {self.instance.id} not in local source tree, will sync with remote')
sync_needs.append(source_update_tag)
else:
logger.debug(f'Project not available locally, {self.instance.id} will sync with remote')
sync_needs.append(source_update_tag)
has_cache = os.path.exists(os.path.join(project.get_cache_path(), project.cache_id))
# Galaxy requirements are not supported for manual projects
if project.scm_type and ((not has_cache) or branch_override):
sync_needs.extend(['install_roles', 'install_collections'])
return sync_needs
def spawn_project_sync(self, project, sync_needs, scm_branch=None):
pu_ig = self.instance.instance_group
pu_en = Instance.objects.me().hostname
sync_metafields = dict(
launch_type="sync",
job_type='run',
job_tags=','.join(sync_needs),
status='running',
instance_group=pu_ig,
execution_node=pu_en,
controller_node=pu_en,
celery_task_id=self.instance.celery_task_id,
)
if scm_branch and scm_branch != project.scm_branch:
sync_metafields['scm_branch'] = scm_branch
sync_metafields['scm_clean'] = True # to accomidate force pushes
if 'update_' not in sync_metafields['job_tags']:
sync_metafields['scm_revision'] = project.scm_revision
local_project_sync = project.create_project_update(_eager_fields=sync_metafields)
local_project_sync.log_lifecycle("controller_node_chosen")
local_project_sync.log_lifecycle("execution_node_chosen")
return local_project_sync
def sync_and_copy_without_lock(self, project, private_data_dir, scm_branch=None):
sync_needs = self.get_sync_needs(project, scm_branch=scm_branch)
if sync_needs:
local_project_sync = self.spawn_project_sync(project, sync_needs, scm_branch=scm_branch)
# save the associated job before calling run() so that a
# cancel() call on the job can cancel the project update
if isinstance(self.instance, Job):
self.instance = self.update_model(self.instance.pk, project_update=local_project_sync)
else:
self.instance = self.update_model(self.instance.pk, source_project_update=local_project_sync)
try:
# the job private_data_dir is passed so sync can download roles and collections there
sync_task = RunProjectUpdate(job_private_data_dir=private_data_dir)
sync_task.run(local_project_sync.id)
local_project_sync.refresh_from_db()
self.instance = self.update_model(self.instance.pk, scm_revision=local_project_sync.scm_revision)
except Exception:
local_project_sync.refresh_from_db()
if local_project_sync.status != 'canceled':
self.instance = self.update_model(
self.instance.pk,
status='failed',
job_explanation=(
'Previous Task Failed: {"job_type": "project_update", '
f'"job_name": "{local_project_sync.name}", "job_id": "{local_project_sync.id}"}}'
),
)
raise
self.instance.refresh_from_db()
if self.instance.cancel_flag:
return
else:
# Case where a local sync is not needed, meaning that local tree is
# up-to-date with project, job is running project current version
self.instance = self.update_model(self.instance.pk, scm_revision=project.scm_revision)
# Project update does not copy the folder, so copy here
RunProjectUpdate.make_local_copy(project, private_data_dir)
def sync_and_copy(self, project, private_data_dir, scm_branch=None):
self.acquire_lock(project, self.instance.id)
try:
original_branch = None
project_path = project.get_project_path(check_if_exists=False)
if project.scm_type == 'git' and (scm_branch and scm_branch != project.scm_branch):
if os.path.exists(project_path):
git_repo = git.Repo(project_path)
if git_repo.head.is_detached:
original_branch = git_repo.head.commit
else:
original_branch = git_repo.active_branch
return self.sync_and_copy_without_lock(project, private_data_dir, scm_branch=scm_branch)
finally:
# We have made the copy so we can set the tree back to its normal state
if original_branch:
# for git project syncs, non-default branches can be problems
# restore to branch the repo was on before this run
try:
original_branch.checkout()
except Exception:
# this could have failed due to dirty tree, but difficult to predict all cases
logger.exception(f'Failed to restore project repo to prior state after {self.instance.id}')
self.release_lock(project)
@task(queue=get_local_queuename)
class RunJob(SourceControlMixin, BaseTask):
class RunJob(BaseTask):
"""
Run a job using ansible-playbook.
"""
@@ -1061,14 +863,98 @@ class RunJob(SourceControlMixin, BaseTask):
job = self.update_model(job.pk, status='failed', job_explanation=msg)
raise RuntimeError(msg)
project_path = job.project.get_project_path(check_if_exists=False)
job_revision = job.project.scm_revision
sync_needs = []
source_update_tag = 'update_{}'.format(job.project.scm_type)
branch_override = bool(job.scm_branch and job.scm_branch != job.project.scm_branch)
if not job.project.scm_type:
pass # manual projects are not synced, user has responsibility for that
elif not os.path.exists(project_path):
logger.debug('Performing fresh clone of {} on this instance.'.format(job.project))
sync_needs.append(source_update_tag)
elif job.project.scm_type == 'git' and job.project.scm_revision and (not branch_override):
try:
git_repo = git.Repo(project_path)
if job_revision == git_repo.head.commit.hexsha:
logger.debug('Skipping project sync for {} because commit is locally available'.format(job.log_format))
else:
sync_needs.append(source_update_tag)
except (ValueError, BadGitName, git.exc.InvalidGitRepositoryError):
logger.debug('Needed commit for {} not in local source tree, will sync with remote'.format(job.log_format))
sync_needs.append(source_update_tag)
else:
logger.debug('Project not available locally, {} will sync with remote'.format(job.log_format))
sync_needs.append(source_update_tag)
has_cache = os.path.exists(os.path.join(job.project.get_cache_path(), job.project.cache_id))
# Galaxy requirements are not supported for manual projects
if job.project.scm_type and ((not has_cache) or branch_override):
sync_needs.extend(['install_roles', 'install_collections'])
if sync_needs:
pu_ig = job.instance_group
pu_en = Instance.objects.me().hostname
sync_metafields = dict(
launch_type="sync",
job_type='run',
job_tags=','.join(sync_needs),
status='running',
instance_group=pu_ig,
execution_node=pu_en,
controller_node=pu_en,
celery_task_id=job.celery_task_id,
)
if branch_override:
sync_metafields['scm_branch'] = job.scm_branch
sync_metafields['scm_clean'] = True # to accomidate force pushes
if 'update_' not in sync_metafields['job_tags']:
sync_metafields['scm_revision'] = job_revision
local_project_sync = job.project.create_project_update(_eager_fields=sync_metafields)
local_project_sync.log_lifecycle("controller_node_chosen")
local_project_sync.log_lifecycle("execution_node_chosen")
create_partition(local_project_sync.event_class._meta.db_table, start=local_project_sync.created)
# save the associated job before calling run() so that a
# cancel() call on the job can cancel the project update
job = self.update_model(job.pk, project_update=local_project_sync)
project_update_task = local_project_sync._get_task_class()
try:
# the job private_data_dir is passed so sync can download roles and collections there
sync_task = project_update_task(job_private_data_dir=private_data_dir)
sync_task.run(local_project_sync.id)
local_project_sync.refresh_from_db()
job = self.update_model(job.pk, scm_revision=local_project_sync.scm_revision)
except Exception:
local_project_sync.refresh_from_db()
if local_project_sync.status != 'canceled':
job = self.update_model(
job.pk,
status='failed',
job_explanation=(
'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}'
% ('project_update', local_project_sync.name, local_project_sync.id)
),
)
raise
job.refresh_from_db()
if job.cancel_flag:
return
else:
# Case where a local sync is not needed, meaning that local tree is
# up-to-date with project, job is running project current version
if job_revision:
job = self.update_model(job.pk, scm_revision=job_revision)
# Project update does not copy the folder, so copy here
RunProjectUpdate.make_local_copy(job.project, private_data_dir, scm_revision=job_revision)
if job.inventory.kind == 'smart':
# cache smart inventory memberships so that the host_filter query is not
# ran inside of the event saving code
update_smart_memberships_for_inventory(job.inventory)
def build_project_dir(self, job, private_data_dir):
self.sync_and_copy(job.project, private_data_dir, scm_branch=job.scm_branch)
def final_run_hook(self, job, status, private_data_dir, fact_modification_times):
super(RunJob, self).final_run_hook(job, status, private_data_dir, fact_modification_times)
if not private_data_dir:
@@ -1100,6 +986,7 @@ class RunProjectUpdate(BaseTask):
def __init__(self, *args, job_private_data_dir=None, **kwargs):
super(RunProjectUpdate, self).__init__(*args, **kwargs)
self.original_branch = None
self.job_private_data_dir = job_private_data_dir
def build_private_data(self, project_update, private_data_dir):
@@ -1269,10 +1156,6 @@ class RunProjectUpdate(BaseTask):
# for raw archive, prevent error moving files between volumes
extra_vars['ansible_remote_tmp'] = os.path.join(project_update.get_project_path(check_if_exists=False), '.ansible_awx', 'tmp')
if project_update.project.signature_validation_credential is not None:
pubkey = project_update.project.signature_validation_credential.get_input('gpg_public_key')
extra_vars['gpg_pubkey'] = pubkey
self._write_extra_vars_file(private_data_dir, extra_vars)
def build_playbook_path_relative_to_cwd(self, project_update, private_data_dir):
@@ -1290,13 +1173,74 @@ class RunProjectUpdate(BaseTask):
d[r'^Are you sure you want to continue connecting \(yes/no\)\?\s*?$'] = 'yes'
return d
def release_lock(self, instance):
try:
fcntl.lockf(self.lock_fd, fcntl.LOCK_UN)
except IOError as e:
logger.error("I/O error({0}) while trying to release lock file [{1}]: {2}".format(e.errno, instance.get_lock_file(), e.strerror))
os.close(self.lock_fd)
raise
os.close(self.lock_fd)
self.lock_fd = None
'''
Note: We don't support blocking=False
'''
def acquire_lock(self, instance, blocking=True):
lock_path = instance.get_lock_file()
if lock_path is None:
# If from migration or someone blanked local_path for any other reason, recoverable by save
instance.save()
lock_path = instance.get_lock_file()
if lock_path is None:
raise RuntimeError(u'Invalid lock file path')
try:
self.lock_fd = os.open(lock_path, os.O_RDWR | os.O_CREAT)
except OSError as e:
logger.error("I/O error({0}) while trying to open lock file [{1}]: {2}".format(e.errno, lock_path, e.strerror))
raise
start_time = time.time()
while True:
try:
instance.refresh_from_db(fields=['cancel_flag'])
if instance.cancel_flag:
logger.debug("ProjectUpdate({0}) was canceled".format(instance.pk))
return
fcntl.lockf(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
break
except IOError as e:
if e.errno not in (errno.EAGAIN, errno.EACCES):
os.close(self.lock_fd)
logger.error("I/O error({0}) while trying to aquire lock on file [{1}]: {2}".format(e.errno, lock_path, e.strerror))
raise
else:
time.sleep(1.0)
waiting_time = time.time() - start_time
if waiting_time > 1.0:
logger.info('{} spent {} waiting to acquire lock for local source tree ' 'for path {}.'.format(instance.log_format, waiting_time, lock_path))
def pre_run_hook(self, instance, private_data_dir):
super(RunProjectUpdate, self).pre_run_hook(instance, private_data_dir)
# re-create root project folder if a natural disaster has destroyed it
if not os.path.exists(settings.PROJECTS_ROOT):
os.mkdir(settings.PROJECTS_ROOT)
project_path = instance.project.get_project_path(check_if_exists=False)
if instance.launch_type != 'sync':
self.acquire_lock(instance.project, instance.id)
self.acquire_lock(instance)
self.original_branch = None
if instance.scm_type == 'git' and instance.branch_override:
if os.path.exists(project_path):
git_repo = git.Repo(project_path)
if git_repo.head.is_detached:
self.original_branch = git_repo.head.commit
else:
self.original_branch = git_repo.active_branch
if not os.path.exists(project_path):
os.makedirs(project_path) # used as container mount
@@ -1307,12 +1251,11 @@ class RunProjectUpdate(BaseTask):
shutil.rmtree(stage_path)
os.makedirs(stage_path) # presence of empty cache indicates lack of roles or collections
def build_project_dir(self, instance, private_data_dir):
# the project update playbook is not in a git repo, but uses a vendoring directory
# to be consistent with the ansible-runner model,
# that is moved into the runner project folder here
awx_playbooks = self.get_path_to('../../', 'playbooks')
shutil.copytree(awx_playbooks, os.path.join(private_data_dir, 'project'))
copy_tree(awx_playbooks, os.path.join(private_data_dir, 'project'))
@staticmethod
def clear_project_cache(cache_dir, keep_value):
@@ -1329,18 +1272,50 @@ class RunProjectUpdate(BaseTask):
logger.warning(f"Could not remove cache directory {old_path}")
@staticmethod
def make_local_copy(project, job_private_data_dir):
def make_local_copy(p, job_private_data_dir, scm_revision=None):
"""Copy project content (roles and collections) to a job private_data_dir
:param object project: Either a project or a project update
:param object p: Either a project or a project update
:param str job_private_data_dir: The root of the target ansible-runner folder
:param str scm_revision: For branch_override cases, the git revision to copy
"""
project_path = project.get_project_path(check_if_exists=False)
project_path = p.get_project_path(check_if_exists=False)
destination_folder = os.path.join(job_private_data_dir, 'project')
shutil.copytree(project_path, destination_folder, ignore=shutil.ignore_patterns('.git'), symlinks=True)
if not scm_revision:
scm_revision = p.scm_revision
if p.scm_type == 'git':
git_repo = git.Repo(project_path)
if not os.path.exists(destination_folder):
os.mkdir(destination_folder, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
tmp_branch_name = 'awx_internal/{}'.format(uuid4())
# always clone based on specific job revision
if not p.scm_revision:
raise RuntimeError('Unexpectedly could not determine a revision to run from project.')
source_branch = git_repo.create_head(tmp_branch_name, p.scm_revision)
# git clone must take file:// syntax for source repo or else options like depth will be ignored
source_as_uri = Path(project_path).as_uri()
git.Repo.clone_from(
source_as_uri,
destination_folder,
branch=source_branch,
depth=1,
single_branch=True, # shallow, do not copy full history
)
# submodules copied in loop because shallow copies from local HEADs are ideal
# and no git clone submodule options are compatible with minimum requirements
for submodule in git_repo.submodules:
subrepo_path = os.path.abspath(os.path.join(project_path, submodule.path))
subrepo_destination_folder = os.path.abspath(os.path.join(destination_folder, submodule.path))
subrepo_uri = Path(subrepo_path).as_uri()
git.Repo.clone_from(subrepo_uri, subrepo_destination_folder, depth=1, single_branch=True)
# force option is necessary because remote refs are not counted, although no information is lost
git_repo.delete_head(tmp_branch_name, force=True)
else:
copy_tree(project_path, destination_folder, preserve_symlinks=1)
# copy over the roles and collection cache to job folder
cache_path = os.path.join(project.get_cache_path(), project.cache_id)
cache_path = os.path.join(p.get_cache_path(), p.cache_id)
subfolders = []
if settings.AWX_COLLECTIONS_ENABLED:
subfolders.append('requirements_collections')
@@ -1350,8 +1325,8 @@ class RunProjectUpdate(BaseTask):
cache_subpath = os.path.join(cache_path, subfolder)
if os.path.exists(cache_subpath):
dest_subpath = os.path.join(job_private_data_dir, subfolder)
shutil.copytree(cache_subpath, dest_subpath, symlinks=True)
logger.debug('{0} {1} prepared {2} from cache'.format(type(project).__name__, project.pk, dest_subpath))
copy_tree(cache_subpath, dest_subpath, preserve_symlinks=1)
logger.debug('{0} {1} prepared {2} from cache'.format(type(p).__name__, p.pk, dest_subpath))
def post_run_hook(self, instance, status):
super(RunProjectUpdate, self).post_run_hook(instance, status)
@@ -1381,13 +1356,23 @@ class RunProjectUpdate(BaseTask):
if self.job_private_data_dir:
if status == 'successful':
# copy project folder before resetting to default branch
# because some git-tree-specific resources (like submodules) might matter
self.make_local_copy(instance, self.job_private_data_dir)
if self.original_branch:
# for git project syncs, non-default branches can be problems
# restore to branch the repo was on before this run
try:
self.original_branch.checkout()
except Exception:
# this could have failed due to dirty tree, but difficult to predict all cases
logger.exception('Failed to restore project repo to prior state after {}'.format(instance.log_format))
finally:
if instance.launch_type != 'sync':
self.release_lock(instance.project)
self.release_lock(instance)
p = instance.project
if instance.job_type == 'check' and status not in ('failed', 'canceled'):
if instance.job_type == 'check' and status not in (
'failed',
'canceled',
):
if self.runner_callback.playbook_new_revision:
p.scm_revision = self.runner_callback.playbook_new_revision
else:
@@ -1415,7 +1400,7 @@ class RunProjectUpdate(BaseTask):
@task(queue=get_local_queuename)
class RunInventoryUpdate(SourceControlMixin, BaseTask):
class RunInventoryUpdate(BaseTask):
model = InventoryUpdate
event_model = InventoryUpdateEvent
@@ -1571,18 +1556,54 @@ class RunInventoryUpdate(SourceControlMixin, BaseTask):
# All credentials not used by inventory source injector
return inventory_update.get_extra_credentials()
def build_project_dir(self, inventory_update, private_data_dir):
def pre_run_hook(self, inventory_update, private_data_dir):
super(RunInventoryUpdate, self).pre_run_hook(inventory_update, private_data_dir)
source_project = None
if inventory_update.inventory_source:
source_project = inventory_update.inventory_source.source_project
if inventory_update.source == 'scm' and source_project and source_project.scm_type: # never ever update manual projects
if inventory_update.source == 'scm':
if not source_project:
raise RuntimeError('Could not find project to run SCM inventory update from.')
self.sync_and_copy(source_project, private_data_dir)
else:
# If source is not SCM make an empty project directory, content is built inside inventory folder
super(RunInventoryUpdate, self).build_project_dir(inventory_update, private_data_dir)
# Check if the content cache exists, so that we do not unnecessarily re-download roles
sync_needs = ['update_{}'.format(source_project.scm_type)]
has_cache = os.path.exists(os.path.join(source_project.get_cache_path(), source_project.cache_id))
# Galaxy requirements are not supported for manual projects
if not has_cache:
sync_needs.extend(['install_roles', 'install_collections'])
local_project_sync = source_project.create_project_update(
_eager_fields=dict(
launch_type="sync",
job_type='run',
job_tags=','.join(sync_needs),
status='running',
execution_node=Instance.objects.me().hostname,
controller_node=Instance.objects.me().hostname,
instance_group=inventory_update.instance_group,
celery_task_id=inventory_update.celery_task_id,
)
)
local_project_sync.log_lifecycle("controller_node_chosen")
local_project_sync.log_lifecycle("execution_node_chosen")
create_partition(local_project_sync.event_class._meta.db_table, start=local_project_sync.created)
# associate the inventory update before calling run() so that a
# cancel() call on the inventory update can cancel the project update
local_project_sync.scm_inventory_updates.add(inventory_update)
project_update_task = local_project_sync._get_task_class()
try:
sync_task = project_update_task(job_private_data_dir=private_data_dir)
sync_task.run(local_project_sync.id)
local_project_sync.refresh_from_db()
except Exception:
inventory_update = self.update_model(
inventory_update.pk,
status='failed',
job_explanation=(
'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}'
% ('project_update', local_project_sync.name, local_project_sync.id)
),
)
raise
def post_run_hook(self, inventory_update, status):
super(RunInventoryUpdate, self).post_run_hook(inventory_update, status)
@@ -1625,7 +1646,7 @@ class RunInventoryUpdate(SourceControlMixin, BaseTask):
handler = SpecialInventoryHandler(
self.runner_callback.event_handler,
signal_callback,
self.runner_callback.cancel_callback,
verbosity=inventory_update.verbosity,
job_timeout=self.get_instance_timeout(self.instance),
start_time=inventory_update.started,

View File

@@ -12,7 +12,6 @@ import yaml
# Django
from django.conf import settings
from django.db import connections
# Runner
import ansible_runner
@@ -26,19 +25,12 @@ from awx.main.utils.common import (
cleanup_new_process,
)
from awx.main.constants import MAX_ISOLATED_PATH_COLON_DELIMITER
from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
from awx.main.models import Instance, InstanceLink, UnifiedJob
from awx.main.dispatch import get_local_queuename
from awx.main.dispatch.publish import task
# Receptorctl
from receptorctl.socket_interface import ReceptorControl
from filelock import FileLock
logger = logging.getLogger('awx.main.tasks.receptor')
__RECEPTOR_CONF = '/etc/receptor/receptor.conf'
__RECEPTOR_CONF_LOCKFILE = f'{__RECEPTOR_CONF}.lock'
RECEPTOR_ACTIVE_STATES = ('Pending', 'Running')
@@ -48,22 +40,9 @@ class ReceptorConnectionType(Enum):
STREAMTLS = 2
def read_receptor_config():
# for K8S deployments, getting a lock is necessary as another process
# may be re-writing the config at this time
if settings.IS_K8S:
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
with lock:
with open(__RECEPTOR_CONF, 'r') as f:
return yaml.safe_load(f)
else:
with open(__RECEPTOR_CONF, 'r') as f:
return yaml.safe_load(f)
def get_receptor_sockfile():
data = read_receptor_config()
with open(__RECEPTOR_CONF, 'r') as f:
data = yaml.safe_load(f)
for section in data:
for entry_name, entry_data in section.items():
if entry_name == 'control-service':
@@ -79,7 +58,8 @@ def get_tls_client(use_stream_tls=None):
if not use_stream_tls:
return None
data = read_receptor_config()
with open(__RECEPTOR_CONF, 'r') as f:
data = yaml.safe_load(f)
for section in data:
for entry_name, entry_data in section.items():
if entry_name == 'tls-client':
@@ -96,25 +76,12 @@ def get_receptor_ctl():
return ReceptorControl(receptor_sockfile)
def find_node_in_mesh(node_name, receptor_ctl):
attempts = 10
backoff = 1
for attempt in range(attempts):
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
for node in all_nodes:
if node.get('NodeID') == node_name:
return node
else:
logger.warning(f"Instance {node_name} is not in the receptor mesh. {attempts-attempt} attempts left.")
time.sleep(backoff)
backoff += 1
else:
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
def get_conn_type(node_name, receptor_ctl):
node = find_node_in_mesh(node_name, receptor_ctl)
return ReceptorConnectionType(node.get('ConnType'))
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
for node in all_nodes:
if node.get('NodeID') == node_name:
return ReceptorConnectionType(node.get('ConnType'))
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
def administrative_workunit_reaper(work_list=None):
@@ -132,22 +99,16 @@ def administrative_workunit_reaper(work_list=None):
for unit_id, work_data in work_list.items():
extra_data = work_data.get('ExtraData')
if extra_data is None:
if (extra_data is None) or (extra_data.get('RemoteWorkType') != 'ansible-runner'):
continue # if this is not ansible-runner work, we do not want to touch it
if isinstance(extra_data, str):
if not work_data.get('StateName', None) or work_data.get('StateName') in RECEPTOR_ACTIVE_STATES:
continue
else:
if extra_data.get('RemoteWorkType') != 'ansible-runner':
continue
params = extra_data.get('RemoteParams', {}).get('params')
if not params:
continue
if not (params == '--worker-info' or params.startswith('cleanup')):
continue # if this is not a cleanup or health check, we do not want to touch it
if work_data.get('StateName') in RECEPTOR_ACTIVE_STATES:
continue # do not want to touch active work units
logger.info(f'Reaping orphaned work unit {unit_id} with params {params}')
params = extra_data.get('RemoteParams', {}).get('params')
if not params:
continue
if not (params == '--worker-info' or params.startswith('cleanup')):
continue # if this is not a cleanup or health check, we do not want to touch it
if work_data.get('StateName') in RECEPTOR_ACTIVE_STATES:
continue # do not want to touch active work units
logger.info(f'Reaping orphaned work unit {unit_id} with params {params}')
receptor_ctl.simple_command(f"work release {unit_id}")
@@ -167,7 +128,8 @@ def run_until_complete(node, timing_data=None, **kwargs):
kwargs.setdefault('payload', '')
transmit_start = time.time()
result = receptor_ctl.submit_work(worktype='ansible-runner', node=node, signwork=True, **kwargs)
sign_work = False if settings.IS_K8S else True
result = receptor_ctl.submit_work(worktype='ansible-runner', node=node, signwork=sign_work, **kwargs)
unit_id = result['unitid']
run_start = time.time()
@@ -242,7 +204,7 @@ def worker_info(node_name, work_type='ansible-runner'):
else:
error_list.append(details)
except Exception as exc:
except (ReceptorNodeNotFound, RuntimeError) as exc:
error_list.append(str(exc))
# If we have a connection error, missing keys would be trivial consequence of that
@@ -313,6 +275,10 @@ class AWXReceptorJob:
except Exception:
logger.exception(f"Error releasing work unit {self.unit_id}.")
@property
def sign_work(self):
return False if settings.IS_K8S else True
def _run_internal(self, receptor_ctl):
# Create a socketpair. Where the left side will be used for writing our payload
# (private data dir, kwargs). The right side will be passed to Receptor for
@@ -363,32 +329,24 @@ class AWXReceptorJob:
shutil.rmtree(artifact_dir)
resultsock, resultfile = receptor_ctl.get_work_results(self.unit_id, return_socket=True, return_sockfile=True)
connections.close_all()
# "processor" and the main thread will be separate threads.
# If a cancel happens, the main thread will encounter an exception, in which case
# we yank the socket out from underneath the processor, which will cause it to exit.
# The ThreadPoolExecutor context manager ensures we do not leave any threads laying around.
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
# Both "processor" and "cancel_watcher" are spawned in separate threads.
# We wait for the first one to return. If cancel_watcher returns first,
# we yank the socket out from underneath the processor, which will cause it
# to exit. A reference to the processor_future is passed into the cancel_watcher_future,
# Which exits if the job has finished normally. The context manager ensures we do not
# leave any threads laying around.
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
processor_future = executor.submit(self.processor, resultfile)
cancel_watcher_future = executor.submit(self.cancel_watcher, processor_future)
futures = [processor_future, cancel_watcher_future]
first_future = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
try:
signal_state.raise_exception = True
# address race condition where SIGTERM was issued after this dispatcher task started
if signal_callback():
raise SignalExit()
res = processor_future.result()
except SignalExit:
res = list(first_future.done)[0].result()
if res.status == 'canceled':
receptor_ctl.simple_command(f"work cancel {self.unit_id}")
resultsock.shutdown(socket.SHUT_RDWR)
resultfile.close()
result = namedtuple('result', ['status', 'rc'])
res = result('canceled', 1)
finally:
signal_state.raise_exception = False
if res.status == 'error':
elif res.status == 'error':
# If ansible-runner ran, but an error occured at runtime, the traceback information
# is saved via the status_handler passed in to the processor.
if 'result_traceback' in self.task.runner_callback.extra_update_fields:
@@ -472,10 +430,6 @@ class AWXReceptorJob:
return receptor_params
@property
def sign_work(self):
return True if self.work_type in ('ansible-runner', 'local') else False
@property
def work_type(self):
if self.task.instance.is_container_group_task:
@@ -486,6 +440,18 @@ class AWXReceptorJob:
return 'local'
return 'ansible-runner'
@cleanup_new_process
def cancel_watcher(self, processor_future):
while True:
if processor_future.done():
return processor_future.result()
if self.task.runner_callback.cancel_callback():
result = namedtuple('result', ['status', 'rc'])
return result('canceled', 1)
time.sleep(1)
@property
def pod_definition(self):
ee = self.task.instance.execution_environment
@@ -604,105 +570,3 @@ class AWXReceptorJob:
else:
config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
return config
# TODO: receptor reload expects ordering within config items to be preserved
# if python dictionary is not preserving order properly, may need to find a
# solution. yaml.dump does not seem to work well with OrderedDict. below line may help
# yaml.add_representer(OrderedDict, lambda dumper, data: dumper.represent_mapping('tag:yaml.org,2002:map', data.items()))
#
RECEPTOR_CONFIG_STARTER = (
{'local-only': None},
{'log-level': 'debug'},
{'node': {'firewallrules': [{'action': 'reject', 'tonode': settings.CLUSTER_HOST_ID, 'toservice': 'control'}]}},
{'control-service': {'service': 'control', 'filename': '/var/run/receptor/receptor.sock', 'permissions': '0660'}},
{'work-command': {'worktype': 'local', 'command': 'ansible-runner', 'params': 'worker', 'allowruntimeparams': True}},
{'work-signing': {'privatekey': '/etc/receptor/signing/work-private-key.pem', 'tokenexpiration': '1m'}},
{
'work-kubernetes': {
'worktype': 'kubernetes-runtime-auth',
'authmethod': 'runtime',
'allowruntimeauth': True,
'allowruntimepod': True,
'allowruntimeparams': True,
}
},
{
'work-kubernetes': {
'worktype': 'kubernetes-incluster-auth',
'authmethod': 'incluster',
'allowruntimeauth': True,
'allowruntimepod': True,
'allowruntimeparams': True,
}
},
{
'tls-client': {
'name': 'tlsclient',
'rootcas': '/etc/receptor/tls/ca/receptor-ca.crt',
'cert': '/etc/receptor/tls/receptor.crt',
'key': '/etc/receptor/tls/receptor.key',
}
},
)
@task()
def write_receptor_config():
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
with lock:
receptor_config = list(RECEPTOR_CONFIG_STARTER)
this_inst = Instance.objects.me()
instances = Instance.objects.filter(node_type=Instance.Types.EXECUTION)
existing_peers = {link.target_id for link in InstanceLink.objects.filter(source=this_inst)}
new_links = []
for instance in instances:
peer = {'tcp-peer': {'address': f'{instance.hostname}:{instance.listener_port}', 'tls': 'tlsclient'}}
receptor_config.append(peer)
if instance.id not in existing_peers:
new_links.append(InstanceLink(source=this_inst, target=instance, link_state=InstanceLink.States.ADDING))
InstanceLink.objects.bulk_create(new_links)
with open(__RECEPTOR_CONF, 'w') as file:
yaml.dump(receptor_config, file, default_flow_style=False)
# This needs to be outside of the lock because this function itself will acquire the lock.
receptor_ctl = get_receptor_ctl()
attempts = 10
for backoff in range(1, attempts + 1):
try:
receptor_ctl.simple_command("reload")
break
except ValueError:
logger.warning(f"Unable to reload Receptor configuration. {attempts-backoff} attempts left.")
time.sleep(backoff)
else:
raise RuntimeError("Receptor reload failed")
links = InstanceLink.objects.filter(source=this_inst, target__in=instances, link_state=InstanceLink.States.ADDING)
links.update(link_state=InstanceLink.States.ESTABLISHED)
@task(queue=get_local_queuename)
def remove_deprovisioned_node(hostname):
InstanceLink.objects.filter(source__hostname=hostname).update(link_state=InstanceLink.States.REMOVING)
InstanceLink.objects.filter(target__hostname=hostname).update(link_state=InstanceLink.States.REMOVING)
node_jobs = UnifiedJob.objects.filter(
execution_node=hostname,
status__in=(
'running',
'waiting',
),
)
while node_jobs.exists():
time.sleep(60)
# This will as a side effect also delete the InstanceLinks that are tied to it.
Instance.objects.filter(hostname=hostname).delete()
# Update the receptor configs for all of the control-plane.
write_receptor_config.apply_async(queue='tower_broadcast_all')

View File

@@ -9,17 +9,12 @@ logger = logging.getLogger('awx.main.tasks.signals')
__all__ = ['with_signal_handling', 'signal_callback']
class SignalExit(Exception):
pass
class SignalState:
def reset(self):
self.sigterm_flag = False
self.is_active = False
self.original_sigterm = None
self.original_sigint = None
self.raise_exception = False
def __init__(self):
self.reset()
@@ -27,9 +22,6 @@ class SignalState:
def set_flag(self, *args):
"""Method to pass into the python signal.signal method to receive signals"""
self.sigterm_flag = True
if self.raise_exception:
self.raise_exception = False # so it is not raised a second time in error handling
raise SignalExit()
def connect_signals(self):
self.original_sigterm = signal.getsignal(signal.SIGTERM)

View File

@@ -10,13 +10,12 @@ from contextlib import redirect_stdout
import shutil
import time
from distutils.version import LooseVersion as Version
from datetime import datetime
# Django
from django.conf import settings
from django.db import transaction, DatabaseError, IntegrityError
from django.db.models.fields.related import ForeignKey
from django.utils.timezone import now, timedelta
from django.utils.timezone import now
from django.utils.encoding import smart_str
from django.contrib.auth.models import User
from django.utils.translation import gettext_lazy as _
@@ -54,14 +53,13 @@ from awx.main.dispatch import get_local_queuename, reaper
from awx.main.utils.common import (
ignore_inventory_computed_fields,
ignore_inventory_group_removal,
ScheduleWorkflowManager,
ScheduleTaskManager,
schedule_task_manager,
)
from awx.main.utils.external_logging import reconfigure_rsyslog
from awx.main.utils.reload import stop_local_services
from awx.main.utils.pglock import advisory_lock
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper
from awx.main.consumers import emit_channel_notification
from awx.main import analytics
from awx.conf import settings_registry
@@ -81,10 +79,6 @@ Try upgrading OpenSSH or providing your private key in an different format. \
def dispatch_startup():
startup_logger = logging.getLogger('awx.main.tasks')
# TODO: Enable this on VM installs
if settings.IS_K8S:
write_receptor_config()
startup_logger.debug("Syncing Schedules")
for sch in Schedule.objects.all():
try:
@@ -109,8 +103,6 @@ def dispatch_startup():
#
apply_cluster_membership_policies()
cluster_node_heartbeat()
reaper.startup_reaping()
reaper.reap_waiting(grace_period=0)
m = Metrics()
m.reset_values()
@@ -122,10 +114,6 @@ def inform_cluster_of_shutdown():
try:
this_inst = Instance.objects.get(hostname=settings.CLUSTER_HOST_ID)
this_inst.mark_offline(update_last_seen=True, errors=_('Instance received normal shutdown signal'))
try:
reaper.reap_waiting(this_inst, grace_period=0)
except Exception:
logger.exception('failed to reap waiting jobs for {}'.format(this_inst.hostname))
logger.warning('Normal shutdown signal for instance {}, removed self from capacity pool.'.format(this_inst.hostname))
except Exception:
logger.exception('Encountered problem with normal shutdown signal.')
@@ -413,12 +401,10 @@ def execution_node_health_check(node):
return
if instance.node_type != 'execution':
logger.warning(f'Execution node health check ran against {instance.node_type} node {instance.hostname}')
return
raise RuntimeError(f'Execution node health check ran against {instance.node_type} node {instance.hostname}')
if instance.node_state not in (Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
logger.warning(f"Execution node health check ran against node {instance.hostname} in state {instance.node_state}")
return
raise RuntimeError(f"Execution node health check ran against node {instance.hostname} in state {instance.node_state}")
data = worker_info(node)
@@ -464,7 +450,7 @@ def inspect_execution_nodes(instance_list):
continue
# Control-plane nodes are dealt with via local_health_check instead.
if instance.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID):
if instance.node_type in ('control', 'hybrid'):
continue
last_seen = parse_date(ad['Time'])
@@ -474,7 +460,7 @@ def inspect_execution_nodes(instance_list):
instance.save(update_fields=['last_seen'])
# Only execution nodes should be dealt with by execution_node_health_check
if instance.node_type == Instance.Types.HOP:
if instance.node_type == 'hop':
if instance.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
logger.warning(f'Hop node {hostname}, has rejoined the receptor mesh')
instance.save_health_data(errors='')
@@ -495,8 +481,8 @@ def inspect_execution_nodes(instance_list):
execution_node_health_check.apply_async([hostname])
@task(queue=get_local_queuename, bind_kwargs=['dispatch_time', 'worker_tasks'])
def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
@task(queue=get_local_queuename)
def cluster_node_heartbeat():
logger.debug("Cluster node heartbeat task.")
nowtime = now()
instance_list = list(Instance.objects.filter(node_state__in=(Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED)))
@@ -519,23 +505,12 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
if this_inst:
startup_event = this_inst.is_lost(ref_time=nowtime)
last_last_seen = this_inst.last_seen
this_inst.local_health_check()
if startup_event and this_inst.capacity != 0:
logger.warning(f'Rejoining the cluster as instance {this_inst.hostname}. Prior last_seen {last_last_seen}')
logger.warning('Rejoining the cluster as instance {}.'.format(this_inst.hostname))
return
elif not last_last_seen:
logger.warning(f'Instance does not have recorded last_seen, updating to {nowtime}')
elif (nowtime - last_last_seen) > timedelta(seconds=settings.CLUSTER_NODE_HEARTBEAT_PERIOD + 2):
logger.warning(f'Heartbeat skew - interval={(nowtime - last_last_seen).total_seconds():.4f}, expected={settings.CLUSTER_NODE_HEARTBEAT_PERIOD}')
else:
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
(changed, this_inst) = Instance.objects.register(ip_address=os.environ.get('MY_POD_IP'), node_type='control', uuid=settings.SYSTEM_UUID)
if changed:
logger.warning(f'Recreated instance record {this_inst.hostname} after unexpected removal')
this_inst.local_health_check()
else:
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
# IFF any node has a greater version than we do, then we'll shutdown services
for other_inst in instance_list:
if other_inst.node_type in ('execution', 'hop'):
@@ -555,13 +530,11 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
for other_inst in lost_instances:
try:
explanation = "Job reaped due to instance shutdown"
reaper.reap(other_inst, job_explanation=explanation)
reaper.reap_waiting(other_inst, grace_period=0, job_explanation=explanation)
reaper.reap(other_inst)
except Exception:
logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
try:
if settings.AWX_AUTO_DEPROVISION_INSTANCES and other_inst.node_type == "control":
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
deprovision_hostname = other_inst.hostname
other_inst.delete() # FIXME: what about associated inbound links?
logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname))
@@ -575,15 +548,6 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
else:
logger.exception('Error marking {} as lost'.format(other_inst.hostname))
# Run local reaper
if worker_tasks is not None:
active_task_ids = []
for task_list in worker_tasks.values():
active_task_ids.extend(task_list)
reaper.reap(instance=this_inst, excluded_uuids=active_task_ids)
if max(len(task_list) for task_list in worker_tasks.values()) <= 1:
reaper.reap_waiting(instance=this_inst, excluded_uuids=active_task_ids, ref_time=datetime.fromisoformat(dispatch_time))
@task(queue=get_local_queuename)
def awx_receptor_workunit_reaper():
@@ -631,8 +595,7 @@ def awx_k8s_reaper():
for group in InstanceGroup.objects.filter(is_container_group=True).iterator():
logger.debug("Checking for orphaned k8s pods for {}.".format(group))
pods = PodManager.list_active_jobs(group)
time_cutoff = now() - timedelta(seconds=settings.K8S_POD_REAPER_GRACE_PERIOD)
for job in UnifiedJob.objects.filter(pk__in=pods.keys(), finished__lte=time_cutoff).exclude(status__in=ACTIVE_STATES):
for job in UnifiedJob.objects.filter(pk__in=pods.keys()).exclude(status__in=ACTIVE_STATES):
logger.debug('{} is no longer active, reaping orphaned k8s pod'.format(job.log_format))
try:
pm = PodManager(job)
@@ -700,13 +663,6 @@ def awx_periodic_scheduler():
state.save()
def schedule_manager_success_or_error(instance):
if instance.unifiedjob_blocked_jobs.exists():
ScheduleTaskManager().schedule()
if instance.spawned_by_workflow:
ScheduleWorkflowManager().schedule()
@task(queue=get_local_queuename)
def handle_work_success(task_actual):
try:
@@ -716,7 +672,8 @@ def handle_work_success(task_actual):
return
if not instance:
return
schedule_manager_success_or_error(instance)
schedule_task_manager()
@task(queue=get_local_queuename)
@@ -758,7 +715,8 @@ def handle_work_error(task_id, *args, **kwargs):
# what the job complete message handler does then we may want to send a
# completion event for each job here.
if first_instance:
schedule_manager_success_or_error(first_instance)
schedule_task_manager()
pass
@task(queue=get_local_queuename)

View File

@@ -210,7 +210,7 @@ def mk_workflow_job_template(name, extra_vars='', spec=None, organization=None,
if extra_vars:
extra_vars = json.dumps(extra_vars)
wfjt = WorkflowJobTemplate.objects.create(name=name, extra_vars=extra_vars, organization=organization, webhook_service=webhook_service)
wfjt = WorkflowJobTemplate(name=name, extra_vars=extra_vars, organization=organization, webhook_service=webhook_service)
if spec:
wfjt.survey_spec = spec

View File

@@ -19,7 +19,8 @@ EXPECTED_VALUES = {
'awx_hosts_total': 1.0,
'awx_schedules_total': 1.0,
'awx_sessions_total': 0.0,
'awx_status_total': 0.0,
'awx_sessions_total': 0.0,
'awx_sessions_total': 0.0,
'awx_running_jobs_total': 0.0,
'awx_instance_capacity': 100.0,
'awx_instance_consumed_capacity': 0.0,

View File

@@ -1,9 +1,16 @@
import pytest
from unittest import mock
from awx.api.versioning import reverse
from awx.main.models.activity_stream import ActivityStream
from awx.main.models.ha import Instance
import redis
# Django
from django.test.utils import override_settings
INSTANCE_KWARGS = dict(hostname='example-host', cpu=6, memory=36000000000, cpu_capacity=6, mem_capacity=42)
@@ -43,14 +50,33 @@ def test_enabled_sets_capacity(patch, admin_user):
def test_auditor_user_health_check(get, post, system_auditor):
instance = Instance.objects.create(**INSTANCE_KWARGS)
url = reverse('api:instance_health_check', kwargs={'pk': instance.pk})
get(url=url, user=system_auditor, expect=200)
r = get(url=url, user=system_auditor, expect=200)
assert r.data['cpu_capacity'] == instance.cpu_capacity
post(url=url, user=system_auditor, expect=403)
@pytest.mark.django_db
def test_health_check_throws_error(post, admin_user):
instance = Instance.objects.create(node_type='execution', **INSTANCE_KWARGS)
url = reverse('api:instance_health_check', kwargs={'pk': instance.pk})
# we will simulate a receptor error, similar to this one
# https://github.com/ansible/receptor/blob/156e6e24a49fbf868734507f9943ac96208ed8f5/receptorctl/receptorctl/socket_interface.py#L204
# related to issue https://github.com/ansible/tower/issues/5315
with mock.patch('awx.main.tasks.receptor.run_until_complete', side_effect=RuntimeError('Remote error: foobar')):
post(url=url, user=admin_user, expect=200)
instance.refresh_from_db()
assert 'Remote error: foobar' in instance.errors
assert instance.capacity == 0
@pytest.mark.django_db
@mock.patch.object(redis.client.Redis, 'ping', lambda self: True)
def test_health_check_usage(get, post, admin_user):
instance = Instance.objects.create(**INSTANCE_KWARGS)
url = reverse('api:instance_health_check', kwargs={'pk': instance.pk})
get(url=url, user=admin_user, expect=200)
r = post(url=url, user=admin_user, expect=200)
assert r.data['msg'] == f"Health check is running for {instance.hostname}."
r = get(url=url, user=admin_user, expect=200)
assert r.data['cpu_capacity'] == instance.cpu_capacity
assert r.data['last_health_check'] is None
with override_settings(CLUSTER_HOST_ID=instance.hostname): # force direct call of cluster_node_health_check
r = post(url=url, user=admin_user, expect=200)
assert r.data['last_health_check'] is not None

View File

@@ -13,11 +13,17 @@ from django.utils import timezone
# AWX
from awx.api.versioning import reverse
from awx.api.views import RelatedJobsPreventDeleteMixin, UnifiedJobDeletionMixin
from awx.main.models import JobTemplate, User, Job, AdHocCommand, ProjectUpdate, InstanceGroup, Label, Organization
from awx.main.models import (
JobTemplate,
User,
Job,
AdHocCommand,
ProjectUpdate,
)
@pytest.mark.django_db
def test_job_relaunch_permission_denied_response(post, get, inventory, project, net_credential, machine_credential):
def test_job_relaunch_permission_denied_response(post, get, inventory, project, credential, net_credential, machine_credential):
jt = JobTemplate.objects.create(name='testjt', inventory=inventory, project=project, ask_credential_on_launch=True)
jt.credentials.add(machine_credential)
jt_user = User.objects.create(username='jobtemplateuser')
@@ -33,22 +39,6 @@ def test_job_relaunch_permission_denied_response(post, get, inventory, project,
job.launch_config.credentials.add(net_credential)
r = post(reverse('api:job_relaunch', kwargs={'pk': job.pk}), {}, jt_user, expect=403)
assert 'launched with prompted fields you do not have access to' in r.data['detail']
job.launch_config.credentials.clear()
# Job has prompted instance group that user cannot see
job.launch_config.instance_groups.add(InstanceGroup.objects.create())
r = post(reverse('api:job_relaunch', kwargs={'pk': job.pk}), {}, jt_user, expect=403)
assert 'launched with prompted fields you do not have access to' in r.data['detail']
job.launch_config.instance_groups.clear()
# Job has prompted label that user cannot see
job.launch_config.labels.add(Label.objects.create(organization=Organization.objects.create()))
r = post(reverse('api:job_relaunch', kwargs={'pk': job.pk}), {}, jt_user, expect=403)
assert 'launched with prompted fields you do not have access to' in r.data['detail']
job.launch_config.labels.clear()
# without any of those prompts, user can launch
r = post(reverse('api:job_relaunch', kwargs={'pk': job.pk}), {}, jt_user, expect=201)
@pytest.mark.django_db

View File

@@ -4,7 +4,8 @@ import yaml
import json
from awx.api.serializers import JobLaunchSerializer
from awx.main.models import Credential, Inventory, Host, ExecutionEnvironment, Label, InstanceGroup
from awx.main.models.credential import Credential
from awx.main.models.inventory import Inventory, Host
from awx.main.models.jobs import Job, JobTemplate, UnifiedJobTemplate
from awx.api.versioning import reverse
@@ -14,11 +15,6 @@ from awx.api.versioning import reverse
def runtime_data(organization, credentialtype_ssh):
cred_obj = Credential.objects.create(name='runtime-cred', credential_type=credentialtype_ssh, inputs={'username': 'test_user2', 'password': 'pas4word2'})
inv_obj = organization.inventories.create(name="runtime-inv")
inv_obj.hosts.create(name='foo1')
inv_obj.hosts.create(name='foo2')
ee_obj = ExecutionEnvironment.objects.create(name='test-ee', image='quay.io/foo/bar')
ig_obj = InstanceGroup.objects.create(name='bar', policy_instance_percentage=100, policy_instance_minimum=2)
labels_obj = Label.objects.create(name='foo', description='bar', organization=organization)
return dict(
extra_vars='{"job_launch_var": 4}',
limit='test-servers',
@@ -29,12 +25,6 @@ def runtime_data(organization, credentialtype_ssh):
credentials=[cred_obj.pk],
diff_mode=True,
verbosity=2,
execution_environment=ee_obj.pk,
labels=[labels_obj.pk],
forks=7,
job_slice_count=2,
timeout=10,
instance_groups=[ig_obj.pk],
)
@@ -64,12 +54,6 @@ def job_template_prompts(project, inventory, machine_credential):
ask_credential_on_launch=on_off,
ask_diff_mode_on_launch=on_off,
ask_verbosity_on_launch=on_off,
ask_execution_environment_on_launch=on_off,
ask_labels_on_launch=on_off,
ask_forks_on_launch=on_off,
ask_job_slice_count_on_launch=on_off,
ask_timeout_on_launch=on_off,
ask_instance_groups_on_launch=on_off,
)
jt.credentials.add(machine_credential)
return jt
@@ -93,12 +77,6 @@ def job_template_prompts_null(project):
ask_credential_on_launch=True,
ask_diff_mode_on_launch=True,
ask_verbosity_on_launch=True,
ask_execution_environment_on_launch=True,
ask_labels_on_launch=True,
ask_forks_on_launch=True,
ask_job_slice_count_on_launch=True,
ask_timeout_on_launch=True,
ask_instance_groups_on_launch=True,
)
@@ -114,12 +92,6 @@ def data_to_internal(data):
internal['credentials'] = set(Credential.objects.get(pk=_id) for _id in data['credentials'])
if 'inventory' in data:
internal['inventory'] = Inventory.objects.get(pk=data['inventory'])
if 'execution_environment' in data:
internal['execution_environment'] = ExecutionEnvironment.objects.get(pk=data['execution_environment'])
if 'labels' in data:
internal['labels'] = [Label.objects.get(pk=_id) for _id in data['labels']]
if 'instance_groups' in data:
internal['instance_groups'] = [InstanceGroup.objects.get(pk=_id) for _id in data['instance_groups']]
return internal
@@ -152,12 +124,6 @@ def test_job_ignore_unprompted_vars(runtime_data, job_template_prompts, post, ad
assert 'credentials' in response.data['ignored_fields']
assert 'job_tags' in response.data['ignored_fields']
assert 'skip_tags' in response.data['ignored_fields']
assert 'execution_environment' in response.data['ignored_fields']
assert 'labels' in response.data['ignored_fields']
assert 'forks' in response.data['ignored_fields']
assert 'job_slice_count' in response.data['ignored_fields']
assert 'timeout' in response.data['ignored_fields']
assert 'instance_groups' in response.data['ignored_fields']
@pytest.mark.django_db
@@ -196,34 +162,6 @@ def test_job_accept_empty_tags(job_template_prompts, post, admin_user, mocker):
mock_job.signal_start.assert_called_once()
@pytest.mark.django_db
@pytest.mark.job_runtime_vars
def test_slice_timeout_forks_need_int(job_template_prompts, post, admin_user, mocker):
job_template = job_template_prompts(True)
mock_job = mocker.MagicMock(spec=Job, id=968)
with mocker.patch.object(JobTemplate, 'create_unified_job', return_value=mock_job):
with mocker.patch('awx.api.serializers.JobSerializer.to_representation'):
response = post(
reverse('api:job_template_launch', kwargs={'pk': job_template.pk}), {'timeout': '', 'job_slice_count': '', 'forks': ''}, admin_user, expect=400
)
assert 'forks' in response.data and response.data['forks'][0] == 'A valid integer is required.'
assert 'job_slice_count' in response.data and response.data['job_slice_count'][0] == 'A valid integer is required.'
assert 'timeout' in response.data and response.data['timeout'][0] == 'A valid integer is required.'
@pytest.mark.django_db
@pytest.mark.job_runtime_vars
def test_slice_count_not_supported(job_template_prompts, post, admin_user):
job_template = job_template_prompts(True)
assert job_template.inventory.hosts.count() == 0
job_template.inventory.hosts.create(name='foo')
response = post(reverse('api:job_template_launch', kwargs={'pk': job_template.pk}), {'job_slice_count': 8}, admin_user, expect=400)
assert response.data['job_slice_count'][0] == 'Job inventory does not have enough hosts for slicing'
@pytest.mark.django_db
@pytest.mark.job_runtime_vars
def test_job_accept_prompted_vars_null(runtime_data, job_template_prompts_null, post, rando, mocker):
@@ -238,10 +176,6 @@ def test_job_accept_prompted_vars_null(runtime_data, job_template_prompts_null,
inventory = Inventory.objects.get(pk=runtime_data['inventory'])
inventory.use_role.members.add(rando)
# Instance Groups and label can not currently easily be used by rando so we need to remove the instance groups from the runtime data
runtime_data.pop('instance_groups')
runtime_data.pop('labels')
mock_job = mocker.MagicMock(spec=Job, id=968, **runtime_data)
with mocker.patch.object(JobTemplate, 'create_unified_job', return_value=mock_job):
@@ -309,59 +243,12 @@ def test_job_launch_fails_without_inventory_access(job_template_prompts, runtime
@pytest.mark.django_db
@pytest.mark.job_runtime_vars
def test_job_launch_works_without_access_to_ig_if_ig_in_template(job_template_prompts, runtime_data, post, rando, mocker):
job_template = job_template_prompts(True)
job_template.instance_groups.add(InstanceGroup.objects.get(id=runtime_data['instance_groups'][0]))
job_template.instance_groups.add(InstanceGroup.objects.create(name='foo'))
job_template.save()
job_template.execute_role.members.add(rando)
# Make sure we get a 201 instead of a 403 since we are providing an override of just a subset of the instance gorup that was already added
post(reverse('api:job_template_launch', kwargs={'pk': job_template.pk}), dict(instance_groups=runtime_data['instance_groups']), rando, expect=201)
@pytest.mark.django_db
@pytest.mark.job_runtime_vars
def test_job_launch_works_without_access_to_label_if_label_in_template(job_template_prompts, runtime_data, post, rando, mocker, organization):
job_template = job_template_prompts(True)
job_template.labels.add(Label.objects.get(id=runtime_data['labels'][0]))
job_template.labels.add(Label.objects.create(name='baz', description='faz', organization=organization))
job_template.save()
job_template.execute_role.members.add(rando)
# Make sure we get a 201 instead of a 403 since we are providing an override of just a subset of the instance gorup that was already added
post(reverse('api:job_template_launch', kwargs={'pk': job_template.pk}), dict(labels=runtime_data['labels']), rando, expect=201)
@pytest.mark.django_db
@pytest.mark.job_runtime_vars
def test_job_launch_works_without_access_to_ee_if_ee_in_template(job_template_prompts, runtime_data, post, rando, mocker, organization):
job_template = job_template_prompts(True)
job_template.execute_role.members.add(rando)
# Make sure we get a 201 instead of a 403 since we are providing an override that is already in the template
post(
reverse('api:job_template_launch', kwargs={'pk': job_template.pk}), dict(execution_environment=runtime_data['execution_environment']), rando, expect=201
)
@pytest.mark.parametrize(
'item_type',
[
('credentials'),
('labels'),
('instance_groups'),
],
)
@pytest.mark.django_db
@pytest.mark.job_runtime_vars
def test_job_launch_fails_without_access(job_template_prompts, runtime_data, post, rando, item_type):
def test_job_launch_fails_without_credential_access(job_template_prompts, runtime_data, post, rando):
job_template = job_template_prompts(True)
job_template.execute_role.members.add(rando)
# Assure that giving a credential without access blocks the launch
data = {item_type: runtime_data[item_type]}
post(reverse('api:job_template_launch', kwargs={'pk': job_template.pk}), data, rando, expect=403)
post(reverse('api:job_template_launch', kwargs={'pk': job_template.pk}), dict(credentials=runtime_data['credentials']), rando, expect=403)
@pytest.mark.django_db

View File

@@ -13,10 +13,7 @@ from awx.main.models.workflow import (
WorkflowJobTemplateNode,
)
from awx.main.models.credential import Credential
from awx.main.scheduler import TaskManager, WorkflowManager, DependencyManager
# Django
from django.utils.timezone import now, timedelta
from awx.main.scheduler import TaskManager
@pytest.fixture
@@ -77,18 +74,6 @@ class TestApprovalNodes:
assert approval_node.unified_job_template.description == 'Approval Node'
assert approval_node.unified_job_template.timeout == 0
def test_approval_node_creation_with_timeout(self, post, approval_node, admin_user):
assert approval_node.timeout is None
url = reverse('api:workflow_job_template_node_create_approval', kwargs={'pk': approval_node.pk, 'version': 'v2'})
post(url, {'name': 'Test', 'description': 'Approval Node', 'timeout': 10}, user=admin_user, expect=201)
approval_node = WorkflowJobTemplateNode.objects.get(pk=approval_node.pk)
approval_node.refresh_from_db()
assert approval_node.timeout is None
assert isinstance(approval_node.unified_job_template, WorkflowApprovalTemplate)
assert approval_node.unified_job_template.timeout == 10
def test_approval_node_creation_failure(self, post, approval_node, admin_user):
# This test leaves off a required param to assert that user will get a 400.
url = reverse('api:workflow_job_template_node_create_approval', kwargs={'pk': approval_node.pk, 'version': 'v2'})
@@ -152,9 +137,8 @@ class TestApprovalNodes:
post(url, {'name': 'Approve Test', 'description': '', 'timeout': 0}, user=admin_user, expect=201)
post(reverse('api:workflow_job_template_launch', kwargs={'pk': wfjt.pk}), user=admin_user, expect=201)
wf_job = WorkflowJob.objects.first()
DependencyManager().schedule() # TODO: exclude workflows from this and delete line
TaskManager().schedule()
WorkflowManager().schedule()
TaskManager().schedule()
wfj_node = wf_job.workflow_nodes.first()
approval = wfj_node.job
assert approval.name == 'Approve Test'
@@ -178,9 +162,8 @@ class TestApprovalNodes:
post(url, {'name': 'Deny Test', 'description': '', 'timeout': 0}, user=admin_user, expect=201)
post(reverse('api:workflow_job_template_launch', kwargs={'pk': wfjt.pk}), user=admin_user, expect=201)
wf_job = WorkflowJob.objects.first()
DependencyManager().schedule() # TODO: exclude workflows from this and delete line
TaskManager().schedule()
WorkflowManager().schedule()
TaskManager().schedule()
wfj_node = wf_job.workflow_nodes.first()
approval = wfj_node.job
assert approval.name == 'Deny Test'
@@ -233,37 +216,6 @@ class TestApprovalNodes:
approval.refresh_from_db()
assert approval.status == 'failed'
def test_expires_time_on_creation(self):
now_time = now()
wa = WorkflowApproval.objects.create(timeout=34)
# this is fudged, so we assert that the expires time is in reasonable range
assert timedelta(seconds=33) < (wa.expires - now_time) < timedelta(seconds=35)
@pytest.mark.parametrize('with_update_fields', [True, False])
def test_expires_time_update(self, with_update_fields):
wa = WorkflowApproval.objects.create()
assert wa.timeout == 0
assert wa.expires is None
wa.timeout = 1234
if with_update_fields:
wa.save(update_fields=['timeout'])
else:
wa.save()
assert wa.created + timedelta(seconds=1234) == wa.expires
@pytest.mark.parametrize('with_update_fields', [True, False])
def test_reset_timeout_and_expires(self, with_update_fields):
wa = WorkflowApproval.objects.create()
wa.timeout = 1234
wa.save()
assert wa.expires
wa.timeout = 0
if with_update_fields:
wa.save(update_fields=['timeout'])
else:
wa.save()
assert wa.expires is None
@pytest.mark.django_db
class TestExclusiveRelationshipEnforcement:

View File

@@ -706,7 +706,7 @@ def jt_linked(organization, project, inventory, machine_credential, credential,
@pytest.fixture
def workflow_job_template(organization):
wjt = WorkflowJobTemplate.objects.create(name='test-workflow_job_template', organization=organization)
wjt = WorkflowJobTemplate(name='test-workflow_job_template', organization=organization)
wjt.save()
return wjt

View File

@@ -1,40 +0,0 @@
from unittest import mock
import pytest
from crum import impersonate
from awx.main.models import Host
@pytest.mark.django_db
def test_modified_by_not_changed(inventory):
with impersonate(None):
host = Host.objects.create(name='foo', inventory=inventory)
assert host.modified_by == None
host.variables = {'foo': 'bar'}
with mock.patch('django.db.models.Model.save') as save_mock:
host.save(update_fields=['variables'])
save_mock.assert_called_once_with(update_fields=['variables'])
@pytest.mark.django_db
def test_modified_by_changed(inventory, alice):
with impersonate(None):
host = Host.objects.create(name='foo', inventory=inventory)
assert host.modified_by == None
with impersonate(alice):
host.variables = {'foo': 'bar'}
with mock.patch('django.db.models.Model.save') as save_mock:
host.save(update_fields=['variables'])
save_mock.assert_called_once_with(update_fields=['variables', 'modified_by'])
assert host.modified_by == alice
@pytest.mark.django_db
def test_created_by(inventory, alice):
with impersonate(alice):
host = Host.objects.create(name='foo', inventory=inventory)
assert host.created_by == alice
with impersonate(None):
host = Host.objects.create(name='bar', inventory=inventory)
assert host.created_by == None

View File

@@ -64,26 +64,3 @@ class TestSlicingModels:
inventory2 = Inventory.objects.create(organization=organization, name='fooinv')
[inventory2.hosts.create(name='foo{}'.format(i)) for i in range(3)]
assert job_template.get_effective_slice_ct({'inventory': inventory2})
def test_effective_slice_count_prompt(self, job_template, inventory, organization):
job_template.inventory = inventory
# Add our prompt fields to the JT to allow overrides
job_template.ask_job_slice_count_on_launch = True
job_template.ask_inventory_on_launch = True
# Set a default value of the slice count to something low
job_template.job_slice_count = 2
# Create an inventory with 4 nodes
inventory2 = Inventory.objects.create(organization=organization, name='fooinv')
[inventory2.hosts.create(name='foo{}'.format(i)) for i in range(4)]
# The inventory slice count will be the min of the number of nodes (4) or the job slice (2)
assert job_template.get_effective_slice_ct({'inventory': inventory2}) == 2
# Now we are going to pass in an override (like the prompt would) and as long as that is < host count we expect that back
assert job_template.get_effective_slice_ct({'inventory': inventory2, 'job_slice_count': 3}) == 3
def test_slice_count_prompt_limited_by_inventory(self, job_template, inventory, organization):
assert inventory.hosts.count() == 0
job_template.inventory = inventory
inventory.hosts.create(name='foo')
unified_job = job_template.create_unified_job(job_slice_count=2)
assert isinstance(unified_job, Job)

View File

@@ -1,8 +1,7 @@
import pytest
# AWX
from awx.main.models.jobs import JobTemplate, LaunchTimeConfigBase
from awx.main.models.execution_environments import ExecutionEnvironment
from awx.main.models import JobTemplate, JobLaunchConfig
@pytest.fixture
@@ -12,6 +11,18 @@ def full_jt(inventory, project, machine_credential):
return jt
@pytest.fixture
def config_factory(full_jt):
def return_config(data):
job = full_jt.create_unified_job(**data)
try:
return job.launch_config
except JobLaunchConfig.DoesNotExist:
return None
return return_config
@pytest.mark.django_db
class TestConfigCreation:
"""
@@ -29,73 +40,28 @@ class TestConfigCreation:
assert config.limit == 'foobar'
assert config.char_prompts == {'limit': 'foobar'}
def test_added_related(self, full_jt, credential, default_instance_group, label):
job = full_jt.create_unified_job(credentials=[credential], instance_groups=[default_instance_group], labels=[label])
def test_added_credential(self, full_jt, credential):
job = full_jt.create_unified_job(credentials=[credential])
config = job.launch_config
assert set(config.credentials.all()) == set([credential])
assert set(config.labels.all()) == set([label])
assert set(config.instance_groups.all()) == set([default_instance_group])
def test_survey_passwords_ignored(self, inventory_source):
iu = inventory_source.create_unified_job(survey_passwords={'foo': '$encrypted$'})
assert iu.launch_config.prompts_dict() == {}
@pytest.fixture
def full_prompts_dict(inventory, credential, label, default_instance_group):
ee = ExecutionEnvironment.objects.create(name='test-ee', image='quay.io/foo/bar')
r = {
'limit': 'foobar',
'inventory': inventory,
'credentials': [credential],
'execution_environment': ee,
'labels': [label],
'instance_groups': [default_instance_group],
'verbosity': 3,
'scm_branch': 'non_dev',
'diff_mode': True,
'skip_tags': 'foobar',
'job_tags': 'untagged',
'forks': 26,
'job_slice_count': 2,
'timeout': 200,
'extra_vars': {'prompted_key': 'prompted_val'},
'job_type': 'check',
}
assert set(JobTemplate.get_ask_mapping().keys()) - set(r.keys()) == set() # make fixture comprehensive
return r
@pytest.mark.django_db
def test_config_reversibility(full_jt, full_prompts_dict):
class TestConfigReversibility:
"""
Checks that a blob of saved prompts will be re-created in the
prompts_dict for launching new jobs
"""
config = full_jt.create_unified_job(**full_prompts_dict).launch_config
assert config.prompts_dict() == full_prompts_dict
def test_char_field_only(self, config_factory):
config = config_factory({'limit': 'foobar'})
assert config.prompts_dict() == {'limit': 'foobar'}
@pytest.mark.django_db
class TestLaunchConfigModels:
def get_concrete_subclasses(self, cls):
r = []
for c in cls.__subclasses__():
if c._meta.abstract:
r.extend(self.get_concrete_subclasses(c))
else:
r.append(c)
return r
def test_non_job_config_complete(self):
"""This performs model validation which replaces code that used run on import."""
for field_name in JobTemplate.get_ask_mapping().keys():
if field_name in LaunchTimeConfigBase.SUBCLASS_FIELDS:
assert not hasattr(LaunchTimeConfigBase, field_name)
else:
assert hasattr(LaunchTimeConfigBase, field_name)
def test_subclass_fields_complete(self):
for cls in self.get_concrete_subclasses(LaunchTimeConfigBase):
for field_name in LaunchTimeConfigBase.SUBCLASS_FIELDS:
assert hasattr(cls, field_name)
def test_related_objects(self, config_factory, inventory, credential):
prompts = {'limit': 'foobar', 'inventory': inventory, 'credentials': set([credential])}
config = config_factory(prompts)
assert config.prompts_dict() == prompts

View File

@@ -252,14 +252,12 @@ class TestTaskImpact:
def test_limit_task_impact(self, job_host_limit, run_computed_fields_right_away):
job = job_host_limit(5, 2)
job.inventory.update_computed_fields()
job.task_impact = job._get_task_impact()
assert job.inventory.total_hosts == 5
assert job.task_impact == 2 + 1 # forks becomes constraint
def test_host_task_impact(self, job_host_limit, run_computed_fields_right_away):
job = job_host_limit(3, 5)
job.inventory.update_computed_fields()
job.task_impact = job._get_task_impact()
assert job.task_impact == 3 + 1 # hosts becomes constraint
def test_shard_task_impact(self, slice_job_factory, run_computed_fields_right_away):
@@ -272,13 +270,9 @@ class TestTaskImpact:
# Even distribution - all jobs run on 1 host
assert [len(jobs[0].inventory.get_script_data(slice_number=i + 1, slice_count=3)['all']['hosts']) for i in range(3)] == [1, 1, 1]
jobs[0].inventory.update_computed_fields()
for j in jobs:
j.task_impact = j._get_task_impact()
assert [job.task_impact for job in jobs] == [2, 2, 2] # plus one base task impact
# Uneven distribution - first job takes the extra host
jobs[0].inventory.hosts.create(name='remainder_foo')
assert [len(jobs[0].inventory.get_script_data(slice_number=i + 1, slice_count=3)['all']['hosts']) for i in range(3)] == [2, 1, 1]
jobs[0].inventory.update_computed_fields()
# recalculate task_impact
jobs[0].task_impact = jobs[0]._get_task_impact()
assert [job.task_impact for job in jobs] == [3, 2, 2]

View File

@@ -12,9 +12,6 @@ from awx.main.models.workflow import (
)
from awx.main.models.jobs import JobTemplate, Job
from awx.main.models.projects import ProjectUpdate
from awx.main.models.credential import Credential, CredentialType
from awx.main.models.label import Label
from awx.main.models.ha import InstanceGroup
from awx.main.scheduler.dag_workflow import WorkflowDAG
from awx.api.versioning import reverse
from awx.api.views import WorkflowJobTemplateNodeSuccessNodesList
@@ -232,65 +229,6 @@ class TestWorkflowJob:
assert queued_node.get_job_kwargs()['extra_vars'] == {'a': 42, 'b': 43}
assert queued_node.ancestor_artifacts == {'a': 42, 'b': 43}
def test_combine_prompts_WFJT_to_node(self, project, inventory, organization):
"""
Test that complex prompts like variables, credentials, labels, etc
are properly combined from the workflow-level with the node-level
"""
jt = JobTemplate.objects.create(
project=project,
inventory=inventory,
ask_variables_on_launch=True,
ask_credential_on_launch=True,
ask_instance_groups_on_launch=True,
ask_labels_on_launch=True,
ask_limit_on_launch=True,
)
wj = WorkflowJob.objects.create(name='test-wf-job', extra_vars='{}')
common_ig = InstanceGroup.objects.create(name='common')
common_ct = CredentialType.objects.create(name='common')
node = WorkflowJobNode.objects.create(workflow_job=wj, unified_job_template=jt, extra_vars={'node_key': 'node_val'})
node.limit = 'node_limit'
node.save()
node_cred_unique = Credential.objects.create(credential_type=CredentialType.objects.create(name='node'))
node_cred_conflicting = Credential.objects.create(credential_type=common_ct)
node.credentials.add(node_cred_unique, node_cred_conflicting)
node_labels = [Label.objects.create(name='node1', organization=organization), Label.objects.create(name='node2', organization=organization)]
node.labels.add(*node_labels)
node_igs = [common_ig, InstanceGroup.objects.create(name='node')]
for ig in node_igs:
node.instance_groups.add(ig)
# assertions for where node has prompts but workflow job does not
data = node.get_job_kwargs()
assert data['extra_vars'] == {'node_key': 'node_val'}
assert set(data['credentials']) == set([node_cred_conflicting, node_cred_unique])
assert data['instance_groups'] == node_igs
assert set(data['labels']) == set(node_labels)
assert data['limit'] == 'node_limit'
# add prompts to the WorkflowJob
wj.limit = 'wj_limit'
wj.extra_vars = {'wj_key': 'wj_val'}
wj.save()
wj_cred_unique = Credential.objects.create(credential_type=CredentialType.objects.create(name='wj'))
wj_cred_conflicting = Credential.objects.create(credential_type=common_ct)
wj.credentials.add(wj_cred_unique, wj_cred_conflicting)
wj.labels.add(Label.objects.create(name='wj1', organization=organization), Label.objects.create(name='wj2', organization=organization))
wj_igs = [InstanceGroup.objects.create(name='wj'), common_ig]
for ig in wj_igs:
wj.instance_groups.add(ig)
# assertions for behavior where node and workflow jobs have prompts
data = node.get_job_kwargs()
assert data['extra_vars'] == {'node_key': 'node_val', 'wj_key': 'wj_val'}
assert set(data['credentials']) == set([wj_cred_unique, wj_cred_conflicting, node_cred_unique])
assert data['instance_groups'] == wj_igs
assert set(data['labels']) == set(node_labels) # as exception, WFJT labels not applied
assert data['limit'] == 'wj_limit'
@pytest.mark.django_db
class TestWorkflowJobTemplate:
@@ -349,25 +287,12 @@ class TestWorkflowJobTemplatePrompts:
@pytest.fixture
def wfjt_prompts(self):
return WorkflowJobTemplate.objects.create(
ask_variables_on_launch=True,
ask_inventory_on_launch=True,
ask_tags_on_launch=True,
ask_labels_on_launch=True,
ask_limit_on_launch=True,
ask_scm_branch_on_launch=True,
ask_skip_tags_on_launch=True,
ask_inventory_on_launch=True, ask_variables_on_launch=True, ask_limit_on_launch=True, ask_scm_branch_on_launch=True
)
@pytest.fixture
def prompts_data(self, inventory):
return dict(
inventory=inventory,
extra_vars={'foo': 'bar'},
limit='webservers',
scm_branch='release-3.3',
job_tags='foo',
skip_tags='bar',
)
return dict(inventory=inventory, extra_vars={'foo': 'bar'}, limit='webservers', scm_branch='release-3.3')
def test_apply_workflow_job_prompts(self, workflow_job_template, wfjt_prompts, prompts_data, inventory):
# null or empty fields used
@@ -375,9 +300,6 @@ class TestWorkflowJobTemplatePrompts:
assert workflow_job.limit is None
assert workflow_job.inventory is None
assert workflow_job.scm_branch is None
assert workflow_job.job_tags is None
assert workflow_job.skip_tags is None
assert len(workflow_job.labels.all()) is 0
# fields from prompts used
workflow_job = workflow_job_template.create_unified_job(**prompts_data)
@@ -385,21 +307,15 @@ class TestWorkflowJobTemplatePrompts:
assert workflow_job.limit == 'webservers'
assert workflow_job.inventory == inventory
assert workflow_job.scm_branch == 'release-3.3'
assert workflow_job.job_tags == 'foo'
assert workflow_job.skip_tags == 'bar'
# non-null fields from WFJT used
workflow_job_template.inventory = inventory
workflow_job_template.limit = 'fooo'
workflow_job_template.scm_branch = 'bar'
workflow_job_template.job_tags = 'baz'
workflow_job_template.skip_tags = 'dinosaur'
workflow_job = workflow_job_template.create_unified_job()
assert workflow_job.limit == 'fooo'
assert workflow_job.inventory == inventory
assert workflow_job.scm_branch == 'bar'
assert workflow_job.job_tags == 'baz'
assert workflow_job.skip_tags == 'dinosaur'
@pytest.mark.django_db
def test_process_workflow_job_prompts(self, inventory, workflow_job_template, wfjt_prompts, prompts_data):
@@ -424,19 +340,12 @@ class TestWorkflowJobTemplatePrompts:
ask_limit_on_launch=True,
scm_branch='bar',
ask_scm_branch_on_launch=True,
job_tags='foo',
skip_tags='bar',
),
user=org_admin,
expect=201,
)
wfjt = WorkflowJobTemplate.objects.get(id=r.data['id'])
assert wfjt.char_prompts == {
'limit': 'foooo',
'scm_branch': 'bar',
'job_tags': 'foo',
'skip_tags': 'bar',
}
assert wfjt.char_prompts == {'limit': 'foooo', 'scm_branch': 'bar'}
assert wfjt.ask_scm_branch_on_launch is True
assert wfjt.ask_limit_on_launch is True
@@ -446,67 +355,6 @@ class TestWorkflowJobTemplatePrompts:
assert r.data['limit'] == 'prompt_limit'
assert r.data['scm_branch'] == 'prompt_branch'
@pytest.mark.django_db
def test_set_all_ask_for_prompts_false_from_post(self, post, organization, inventory, org_admin):
'''
Tests default behaviour and values of ask_for_* fields on WFJT via POST
'''
r = post(
url=reverse('api:workflow_job_template_list'),
data=dict(
name='workflow that tests ask_for prompts',
organization=organization.id,
inventory=inventory.id,
job_tags='',
skip_tags='',
),
user=org_admin,
expect=201,
)
wfjt = WorkflowJobTemplate.objects.get(id=r.data['id'])
assert wfjt.ask_inventory_on_launch is False
assert wfjt.ask_labels_on_launch is False
assert wfjt.ask_limit_on_launch is False
assert wfjt.ask_scm_branch_on_launch is False
assert wfjt.ask_skip_tags_on_launch is False
assert wfjt.ask_tags_on_launch is False
assert wfjt.ask_variables_on_launch is False
@pytest.mark.django_db
def test_set_all_ask_for_prompts_true_from_post(self, post, organization, inventory, org_admin):
'''
Tests behaviour and values of ask_for_* fields on WFJT via POST
'''
r = post(
url=reverse('api:workflow_job_template_list'),
data=dict(
name='workflow that tests ask_for prompts',
organization=organization.id,
inventory=inventory.id,
job_tags='',
skip_tags='',
ask_inventory_on_launch=True,
ask_labels_on_launch=True,
ask_limit_on_launch=True,
ask_scm_branch_on_launch=True,
ask_skip_tags_on_launch=True,
ask_tags_on_launch=True,
ask_variables_on_launch=True,
),
user=org_admin,
expect=201,
)
wfjt = WorkflowJobTemplate.objects.get(id=r.data['id'])
assert wfjt.ask_inventory_on_launch is True
assert wfjt.ask_labels_on_launch is True
assert wfjt.ask_limit_on_launch is True
assert wfjt.ask_scm_branch_on_launch is True
assert wfjt.ask_skip_tags_on_launch is True
assert wfjt.ask_tags_on_launch is True
assert wfjt.ask_variables_on_launch is True
@pytest.mark.django_db
def test_workflow_ancestors(organization):

View File

@@ -1,6 +0,0 @@
def create_job(jt, dependencies_processed=True):
job = jt.create_unified_job()
job.status = "pending"
job.dependencies_processed = dependencies_processed
job.save()
return job

View File

@@ -1,10 +1,9 @@
import pytest
from unittest import mock
from datetime import timedelta
from awx.main.scheduler import TaskManager, DependencyManager
from awx.main.models import InstanceGroup
from awx.main.scheduler import TaskManager
from awx.main.models import InstanceGroup, WorkflowJob
from awx.main.tasks.system import apply_cluster_membership_policies
from . import create_job
@pytest.mark.django_db
@@ -13,12 +12,16 @@ def test_multi_group_basic_job_launch(instance_factory, controlplane_instance_gr
i2 = instance_factory("i2")
ig1 = instance_group_factory("ig1", instances=[i1])
ig2 = instance_group_factory("ig2", instances=[i2])
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1')
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1', jobs=["job_should_start"])
objects1.job_template.instance_groups.add(ig1)
j1 = create_job(objects1.job_template)
objects2 = job_template_factory('jt2', organization='org2', project='proj2', inventory='inv2', credential='cred2')
j1 = objects1.jobs['job_should_start']
j1.status = 'pending'
j1.save()
objects2 = job_template_factory('jt2', organization='org2', project='proj2', inventory='inv2', credential='cred2', jobs=["job_should_still_start"])
objects2.job_template.instance_groups.add(ig2)
j2 = create_job(objects2.job_template)
j2 = objects2.jobs['job_should_still_start']
j2.status = 'pending'
j2.save()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = 500
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
@@ -32,26 +35,23 @@ def test_multi_group_with_shared_dependency(instance_factory, controlplane_insta
i2 = instance_factory("i2")
ig1 = instance_group_factory("ig1", instances=[i1])
ig2 = instance_group_factory("ig2", instances=[i2])
objects1 = job_template_factory(
'jt1',
organization='org1',
project='proj1',
inventory='inv1',
credential='cred1',
)
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1', jobs=["job_should_start"])
objects1.job_template.instance_groups.add(ig1)
j1 = create_job(objects1.job_template, dependencies_processed=False)
p = objects1.project
p.scm_update_on_launch = True
p.scm_update_cache_timeout = 0
p.scm_type = "git"
p.scm_url = "http://github.com/ansible/ansible.git"
p.save()
objects2 = job_template_factory('jt2', organization=objects1.organization, project=p, inventory='inv2', credential='cred2')
j1 = objects1.jobs['job_should_start']
j1.status = 'pending'
j1.save()
objects2 = job_template_factory('jt2', organization=objects1.organization, project=p, inventory='inv2', credential='cred2', jobs=["job_should_still_start"])
objects2.job_template.instance_groups.add(ig2)
j2 = create_job(objects2.job_template, dependencies_processed=False)
j2 = objects2.jobs['job_should_still_start']
j2.status = 'pending'
j2.save()
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
DependencyManager().schedule()
TaskManager().schedule()
pu = p.project_updates.first()
TaskManager.start_task.assert_called_once_with(pu, controlplane_instance_group, [j1, j2], controlplane_instance_group.instances.all()[0])
@@ -59,7 +59,6 @@ def test_multi_group_with_shared_dependency(instance_factory, controlplane_insta
pu.status = "successful"
pu.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
DependencyManager().schedule()
TaskManager().schedule()
TaskManager.start_task.assert_any_call(j1, ig1, [], i1)
@@ -70,7 +69,7 @@ def test_multi_group_with_shared_dependency(instance_factory, controlplane_insta
@pytest.mark.django_db
def test_workflow_job_no_instancegroup(workflow_job_template_factory, controlplane_instance_group, mocker):
wfjt = workflow_job_template_factory('anicedayforawalk').workflow_job_template
wfj = wfjt.create_unified_job()
wfj = WorkflowJob.objects.create(workflow_job_template=wfjt)
wfj.status = "pending"
wfj.save()
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
@@ -86,50 +85,39 @@ def test_overcapacity_blocking_other_groups_unaffected(instance_factory, control
i1.capacity = 1020
i1.save()
i2 = instance_factory("i2")
i2.capacity = 1020
i2.save()
ig1 = instance_group_factory("ig1", instances=[i1])
ig2 = instance_group_factory("ig2", instances=[i2])
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1')
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1', jobs=["job_should_start"])
objects1.job_template.instance_groups.add(ig1)
j1 = create_job(objects1.job_template)
objects2 = job_template_factory('jt2', organization=objects1.organization, project='proj2', inventory='inv2', credential='cred2')
j1 = objects1.jobs['job_should_start']
j1.status = 'pending'
j1.save()
objects2 = job_template_factory(
'jt2', organization=objects1.organization, project='proj2', inventory='inv2', credential='cred2', jobs=["job_should_start", "job_should_also_start"]
)
objects2.job_template.instance_groups.add(ig1)
j1_1 = create_job(objects2.job_template)
objects3 = job_template_factory('jt3', organization='org2', project='proj3', inventory='inv3', credential='cred3')
j1_1 = objects2.jobs['job_should_also_start']
j1_1.status = 'pending'
j1_1.save()
objects3 = job_template_factory('jt3', organization='org2', project='proj3', inventory='inv3', credential='cred3', jobs=["job_should_still_start"])
objects3.job_template.instance_groups.add(ig2)
j2 = create_job(objects3.job_template)
objects4 = job_template_factory('jt4', organization=objects3.organization, project='proj4', inventory='inv4', credential='cred4')
j2 = objects3.jobs['job_should_still_start']
j2.status = 'pending'
j2.save()
objects4 = job_template_factory(
'jt4', organization=objects3.organization, project='proj4', inventory='inv4', credential='cred4', jobs=["job_should_not_start"]
)
objects4.job_template.instance_groups.add(ig2)
j2_1 = create_job(objects4.job_template)
j2_1 = objects4.jobs['job_should_not_start']
j2_1.status = 'pending'
j2_1.save()
tm = TaskManager()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = 500
TaskManager().schedule()
# all jobs should be able to run, plenty of capacity across both instances
for j in [j1, j1_1, j2, j2_1]:
j.refresh_from_db()
assert j.status == "waiting"
# reset to pending
for j in [j1, j1_1, j2, j2_1]:
j.status = "pending"
j.save()
# make i2 can only be able to fit 1 job
i2.capacity = 510
i2.save()
TaskManager().schedule()
for j in [j1, j1_1, j2]:
j.refresh_from_db()
assert j.status == "waiting"
j2_1.refresh_from_db()
# could not run because i2 is full
assert j2_1.status == "pending"
with mock.patch.object(TaskManager, "start_task", wraps=tm.start_task) as mock_job:
tm.schedule()
mock_job.assert_has_calls([mock.call(j1, ig1, [], i1), mock.call(j1_1, ig1, [], i1), mock.call(j2, ig2, [], i2)])
assert mock_job.call_count == 3
@pytest.mark.django_db
@@ -138,13 +126,19 @@ def test_failover_group_run(instance_factory, controlplane_instance_group, mocke
i2 = instance_factory("i2")
ig1 = instance_group_factory("ig1", instances=[i1])
ig2 = instance_group_factory("ig2", instances=[i2])
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1')
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1', jobs=["job_should_start"])
objects1.job_template.instance_groups.add(ig1)
j1 = create_job(objects1.job_template)
objects2 = job_template_factory('jt2', organization=objects1.organization, project='proj2', inventory='inv2', credential='cred2')
j1 = objects1.jobs['job_should_start']
j1.status = 'pending'
j1.save()
objects2 = job_template_factory(
'jt2', organization=objects1.organization, project='proj2', inventory='inv2', credential='cred2', jobs=["job_should_start", "job_should_also_start"]
)
objects2.job_template.instance_groups.add(ig1)
objects2.job_template.instance_groups.add(ig2)
j1_1 = create_job(objects2.job_template)
j1_1 = objects2.jobs['job_should_also_start']
j1_1.status = 'pending'
j1_1.save()
tm = TaskManager()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = 500

View File

@@ -3,19 +3,21 @@ from unittest import mock
import json
from datetime import timedelta
from awx.main.scheduler import TaskManager, DependencyManager, WorkflowManager
from awx.main.scheduler import TaskManager
from awx.main.scheduler.dependency_graph import DependencyGraph
from awx.main.utils import encrypt_field
from awx.main.models import WorkflowJobTemplate, JobTemplate, Job
from awx.main.models.ha import Instance
from . import create_job
from django.conf import settings
@pytest.mark.django_db
def test_single_job_scheduler_launch(hybrid_instance, controlplane_instance_group, job_template_factory, mocker):
instance = controlplane_instance_group.instances.all()[0]
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
j = create_job(objects.job_template)
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job_should_start"])
j = objects.jobs["job_should_start"]
j.status = 'pending'
j.save()
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j, controlplane_instance_group, [], instance)
@@ -30,8 +32,10 @@ class TestJobLifeCycle:
expect_commit - list of expected on_commit calls
If any of these are None, then the assertion is not made.
"""
if expect_schedule and len(expect_schedule) > 1:
raise RuntimeError('Task manager should reschedule itself one time, at most.')
with mock.patch('awx.main.models.unified_jobs.UnifiedJob.websocket_emit_status') as mock_channel:
with mock.patch('awx.main.utils.common.ScheduleManager._schedule') as tm_sch:
with mock.patch('awx.main.utils.common._schedule_task_manager') as tm_sch:
# Job are ultimately submitted in on_commit hook, but this will not
# actually run, because it waits until outer transaction, which is the test
# itself in this case
@@ -52,21 +56,22 @@ class TestJobLifeCycle:
wj = wfjt.create_unified_job()
assert wj.workflow_nodes.count() == 2
wj.signal_start()
tm = TaskManager()
# Transitions workflow job to running
# needs to re-schedule so it spawns jobs next round
self.run_tm(TaskManager(), [mock.call('running')])
self.run_tm(tm, [mock.call('running')], [mock.call()])
# Spawns jobs
# needs re-schedule to submit jobs next round
self.run_tm(WorkflowManager(), [mock.call('pending'), mock.call('pending')])
self.run_tm(tm, [mock.call('pending'), mock.call('pending')], [mock.call()])
assert jt.jobs.count() == 2 # task manager spawned jobs
# Submits jobs
# intermission - jobs will run and reschedule TM when finished
self.run_tm(DependencyManager()) # flip dependencies_processed to True
self.run_tm(TaskManager())
self.run_tm(tm, [mock.call('waiting'), mock.call('waiting')], [])
# I am the job runner
for job in jt.jobs.all():
job.status = 'successful'
@@ -74,7 +79,7 @@ class TestJobLifeCycle:
# Finishes workflow
# no further action is necessary, so rescheduling should not happen
self.run_tm(WorkflowManager(), [mock.call('successful')])
self.run_tm(tm, [mock.call('successful')], [])
def test_task_manager_workflow_workflow_rescheduling(self, controlplane_instance_group):
wfjts = [WorkflowJobTemplate.objects.create(name='foo')]
@@ -85,13 +90,16 @@ class TestJobLifeCycle:
wj = wfjts[0].create_unified_job()
wj.signal_start()
tm = TaskManager()
attempts = 10
while wfjts[0].status != 'successful' and attempts > 0:
self.run_tm(TaskManager())
self.run_tm(WorkflowManager())
while wfjts[0].status != 'successful':
wfjts[1].refresh_from_db()
if wfjts[1].status == 'successful':
# final run, no more work to do
self.run_tm(tm, expect_schedule=[])
else:
self.run_tm(tm, expect_schedule=[mock.call()])
wfjts[0].refresh_from_db()
attempts -= 1
def test_control_and_execution_instance(self, project, system_job_template, job_template, inventory_source, control_instance, execution_instance):
assert Instance.objects.count() == 2
@@ -105,7 +113,6 @@ class TestJobLifeCycle:
for uj in all_ujs:
uj.signal_start()
DependencyManager().schedule()
tm = TaskManager()
self.run_tm(tm)
@@ -128,7 +135,6 @@ class TestJobLifeCycle:
for uj in all_ujs:
uj.signal_start()
DependencyManager().schedule()
# There is only enough control capacity to run one of the jobs so one should end up in pending and the other in waiting
tm = TaskManager()
self.run_tm(tm)
@@ -151,7 +157,6 @@ class TestJobLifeCycle:
for uj in all_ujs:
uj.signal_start()
DependencyManager().schedule()
# There is only enough control capacity to run one of the jobs so one should end up in pending and the other in waiting
tm = TaskManager()
self.run_tm(tm)
@@ -192,49 +197,63 @@ class TestJobLifeCycle:
@pytest.mark.django_db
def test_single_jt_multi_job_launch_blocks_last(job_template_factory):
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
j1 = create_job(objects.job_template)
j2 = create_job(objects.job_template)
TaskManager().schedule()
j1.refresh_from_db()
j2.refresh_from_db()
assert j1.status == "waiting"
assert j2.status == "pending"
# mimic running j1 to unblock j2
j1.status = "successful"
def test_single_jt_multi_job_launch_blocks_last(controlplane_instance_group, job_template_factory, mocker):
instance = controlplane_instance_group.instances.all()[0]
objects = job_template_factory(
'jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job_should_start", "job_should_not_start"]
)
j1 = objects.jobs["job_should_start"]
j1.status = 'pending'
j1.save()
TaskManager().schedule()
j2.refresh_from_db()
assert j2.status == "waiting"
j2 = objects.jobs["job_should_not_start"]
j2.status = 'pending'
j2.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j1, controlplane_instance_group, [], instance)
j1.status = "successful"
j1.save()
with mocker.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j2, controlplane_instance_group, [], instance)
@pytest.mark.django_db
def test_single_jt_multi_job_launch_allow_simul_allowed(job_template_factory):
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
def test_single_jt_multi_job_launch_allow_simul_allowed(controlplane_instance_group, job_template_factory, mocker):
instance = controlplane_instance_group.instances.all()[0]
objects = job_template_factory(
'jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job_should_start", "job_should_not_start"]
)
jt = objects.job_template
jt.allow_simultaneous = True
jt.save()
j1 = create_job(objects.job_template)
j2 = create_job(objects.job_template)
TaskManager().schedule()
j1.refresh_from_db()
j2.refresh_from_db()
assert j1.status == "waiting"
assert j2.status == "waiting"
j1 = objects.jobs["job_should_start"]
j1.allow_simultaneous = True
j1.status = 'pending'
j1.save()
j2 = objects.jobs["job_should_not_start"]
j2.allow_simultaneous = True
j2.status = 'pending'
j2.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_has_calls(
[mock.call(j1, controlplane_instance_group, [], instance), mock.call(j2, controlplane_instance_group, [], instance)]
)
@pytest.mark.django_db
def test_multi_jt_capacity_blocking(hybrid_instance, job_template_factory, mocker):
instance = hybrid_instance
controlplane_instance_group = instance.rampart_groups.first()
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1')
objects2 = job_template_factory('jt2', organization='org2', project='proj2', inventory='inv2', credential='cred2')
j1 = create_job(objects1.job_template)
j2 = create_job(objects2.job_template)
objects1 = job_template_factory('jt1', organization='org1', project='proj1', inventory='inv1', credential='cred1', jobs=["job_should_start"])
objects2 = job_template_factory('jt2', organization='org2', project='proj2', inventory='inv2', credential='cred2', jobs=["job_should_not_start"])
j1 = objects1.jobs["job_should_start"]
j1.status = 'pending'
j1.save()
j2 = objects2.jobs["job_should_not_start"]
j2.status = 'pending'
j2.save()
tm = TaskManager()
with mock.patch('awx.main.models.Job.task_impact', new_callable=mock.PropertyMock) as mock_task_impact:
mock_task_impact.return_value = 505
@@ -250,9 +269,11 @@ def test_multi_jt_capacity_blocking(hybrid_instance, job_template_factory, mocke
@pytest.mark.django_db
def test_single_job_dependencies_project_launch(controlplane_instance_group, job_template_factory, mocker):
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job_should_start"])
instance = controlplane_instance_group.instances.all()[0]
j = create_job(objects.job_template, dependencies_processed=False)
j = objects.jobs["job_should_start"]
j.status = 'pending'
j.save()
p = objects.project
p.scm_update_on_launch = True
p.scm_update_cache_timeout = 0
@@ -260,13 +281,12 @@ def test_single_job_dependencies_project_launch(controlplane_instance_group, job
p.scm_url = "http://github.com/ansible/ansible.git"
p.save(skip_update=True)
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
dm = DependencyManager()
with mock.patch.object(DependencyManager, "create_project_update", wraps=dm.create_project_update) as mock_pu:
dm.schedule()
tm = TaskManager()
with mock.patch.object(TaskManager, "create_project_update", wraps=tm.create_project_update) as mock_pu:
tm.schedule()
mock_pu.assert_called_once_with(j)
pu = [x for x in p.project_updates.all()]
assert len(pu) == 1
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(pu[0], controlplane_instance_group, [j], instance)
pu[0].status = "successful"
pu[0].save()
@@ -277,9 +297,11 @@ def test_single_job_dependencies_project_launch(controlplane_instance_group, job
@pytest.mark.django_db
def test_single_job_dependencies_inventory_update_launch(controlplane_instance_group, job_template_factory, mocker, inventory_source_factory):
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job_should_start"])
instance = controlplane_instance_group.instances.all()[0]
j = create_job(objects.job_template, dependencies_processed=False)
j = objects.jobs["job_should_start"]
j.status = 'pending'
j.save()
i = objects.inventory
ii = inventory_source_factory("ec2")
ii.source = "ec2"
@@ -288,13 +310,12 @@ def test_single_job_dependencies_inventory_update_launch(controlplane_instance_g
ii.save()
i.inventory_sources.add(ii)
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
dm = DependencyManager()
with mock.patch.object(DependencyManager, "create_inventory_update", wraps=dm.create_inventory_update) as mock_iu:
dm.schedule()
tm = TaskManager()
with mock.patch.object(TaskManager, "create_inventory_update", wraps=tm.create_inventory_update) as mock_iu:
tm.schedule()
mock_iu.assert_called_once_with(j, ii)
iu = [x for x in ii.inventory_updates.all()]
assert len(iu) == 1
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(iu[0], controlplane_instance_group, [j], instance)
iu[0].status = "successful"
iu[0].save()
@@ -313,17 +334,19 @@ def test_inventory_update_launches_project_update(controlplane_instance_group, s
iu.status = "pending"
iu.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
dm = DependencyManager()
with mock.patch.object(DependencyManager, "create_project_update", wraps=dm.create_project_update) as mock_pu:
dm.schedule()
tm = TaskManager()
with mock.patch.object(TaskManager, "create_project_update", wraps=tm.create_project_update) as mock_pu:
tm.schedule()
mock_pu.assert_called_with(iu, project_id=project.id)
@pytest.mark.django_db
def test_job_dependency_with_already_updated(controlplane_instance_group, job_template_factory, mocker, inventory_source_factory):
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job_should_start"])
instance = controlplane_instance_group.instances.all()[0]
j = create_job(objects.job_template, dependencies_processed=False)
j = objects.jobs["job_should_start"]
j.status = 'pending'
j.save()
i = objects.inventory
ii = inventory_source_factory("ec2")
ii.source = "ec2"
@@ -336,9 +359,9 @@ def test_job_dependency_with_already_updated(controlplane_instance_group, job_te
j.start_args = encrypt_field(j, field_name="start_args")
j.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
dm = DependencyManager()
with mock.patch.object(DependencyManager, "create_inventory_update", wraps=dm.create_inventory_update) as mock_iu:
dm.schedule()
tm = TaskManager()
with mock.patch.object(TaskManager, "create_inventory_update", wraps=tm.create_inventory_update) as mock_iu:
tm.schedule()
mock_iu.assert_not_called()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
@@ -348,11 +371,13 @@ def test_job_dependency_with_already_updated(controlplane_instance_group, job_te
@pytest.mark.django_db
def test_shared_dependencies_launch(controlplane_instance_group, job_template_factory, mocker, inventory_source_factory):
instance = controlplane_instance_group.instances.all()[0]
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
objects.job_template.allow_simultaneous = True
objects.job_template.save()
j1 = create_job(objects.job_template, dependencies_processed=False)
j2 = create_job(objects.job_template, dependencies_processed=False)
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["first_job", "second_job"])
j1 = objects.jobs["first_job"]
j1.status = 'pending'
j1.save()
j2 = objects.jobs["second_job"]
j2.status = 'pending'
j2.save()
p = objects.project
p.scm_update_on_launch = True
p.scm_update_cache_timeout = 300
@@ -367,8 +392,8 @@ def test_shared_dependencies_launch(controlplane_instance_group, job_template_fa
ii.update_cache_timeout = 300
ii.save()
i.inventory_sources.add(ii)
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
DependencyManager().schedule()
TaskManager().schedule()
pu = p.project_updates.first()
iu = ii.inventory_updates.first()
@@ -383,9 +408,12 @@ def test_shared_dependencies_launch(controlplane_instance_group, job_template_fa
iu.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_has_calls(
[mock.call(j1, controlplane_instance_group, [], instance), mock.call(j2, controlplane_instance_group, [], instance)]
)
TaskManager.start_task.assert_called_once_with(j1, controlplane_instance_group, [], instance)
j1.status = "successful"
j1.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(j2, controlplane_instance_group, [], instance)
pu = [x for x in p.project_updates.all()]
iu = [x for x in ii.inventory_updates.all()]
assert len(pu) == 1
@@ -394,27 +422,6 @@ def test_shared_dependencies_launch(controlplane_instance_group, job_template_fa
@pytest.mark.django_db
def test_job_not_blocking_project_update(controlplane_instance_group, job_template_factory):
instance = controlplane_instance_group.instances.all()[0]
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred')
job = objects.job_template.create_unified_job()
job.instance_group = controlplane_instance_group
job.dependencies_process = True
job.status = "running"
job.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
proj = objects.project
project_update = proj.create_project_update()
project_update.instance_group = controlplane_instance_group
project_update.status = "pending"
project_update.save()
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(project_update, controlplane_instance_group, [], instance)
@pytest.mark.django_db
def test_job_not_blocking_inventory_update(controlplane_instance_group, job_template_factory, inventory_source_factory):
instance = controlplane_instance_group.instances.all()[0]
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job"])
job = objects.jobs["job"]
job.instance_group = controlplane_instance_group
@@ -422,6 +429,33 @@ def test_job_not_blocking_inventory_update(controlplane_instance_group, job_temp
job.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
task_manager = TaskManager()
task_manager._schedule()
proj = objects.project
project_update = proj.create_project_update()
project_update.instance_group = controlplane_instance_group
project_update.status = "pending"
project_update.save()
assert not task_manager.job_blocked_by(project_update)
dependency_graph = DependencyGraph()
dependency_graph.add_job(job)
assert not dependency_graph.task_blocked_by(project_update)
@pytest.mark.django_db
def test_job_not_blocking_inventory_update(controlplane_instance_group, job_template_factory, inventory_source_factory):
objects = job_template_factory('jt', organization='org1', project='proj', inventory='inv', credential='cred', jobs=["job"])
job = objects.jobs["job"]
job.instance_group = controlplane_instance_group
job.status = "running"
job.save()
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
task_manager = TaskManager()
task_manager._schedule()
inv = objects.inventory
inv_source = inventory_source_factory("ec2")
inv_source.source = "ec2"
@@ -431,9 +465,11 @@ def test_job_not_blocking_inventory_update(controlplane_instance_group, job_temp
inventory_update.status = "pending"
inventory_update.save()
DependencyManager().schedule()
TaskManager().schedule()
TaskManager.start_task.assert_called_once_with(inventory_update, controlplane_instance_group, [], instance)
assert not task_manager.job_blocked_by(inventory_update)
dependency_graph = DependencyGraph()
dependency_graph.add_job(job)
assert not dependency_graph.task_blocked_by(inventory_update)
@pytest.mark.django_db
@@ -448,7 +484,7 @@ def test_generate_dependencies_only_once(job_template_factory):
# job starts with dependencies_processed as False
assert not job.dependencies_processed
# run one cycle of ._schedule() to generate dependencies
DependencyManager().schedule()
TaskManager()._schedule()
# make sure dependencies_processed is now True
job = Job.objects.filter(name="job_gen_dep")[0]
@@ -456,7 +492,7 @@ def test_generate_dependencies_only_once(job_template_factory):
# Run ._schedule() again, but make sure .generate_dependencies() is not
# called with job in the argument list
dm = DependencyManager()
dm.generate_dependencies = mock.MagicMock(return_value=[])
dm.schedule()
dm.generate_dependencies.assert_not_called()
tm = TaskManager()
tm.generate_dependencies = mock.MagicMock(return_value=[])
tm._schedule()
tm.generate_dependencies.assert_has_calls([mock.call([]), mock.call([])])

View File

@@ -6,20 +6,12 @@ from awx.main.utils import decrypt_field
from awx.main.models.workflow import WorkflowJobTemplate, WorkflowJobTemplateNode, WorkflowApprovalTemplate
from awx.main.models.jobs import JobTemplate
from awx.main.tasks.system import deep_copy_model_obj
from awx.main.models import Label, ExecutionEnvironment, InstanceGroup
@pytest.mark.django_db
def test_job_template_copy(
post, get, project, inventory, machine_credential, vault_credential, credential, alice, job_template_with_survey_passwords, admin, organization
):
label = Label.objects.create(name="foobar", organization=organization)
ig = InstanceGroup.objects.create(name="bazbar", organization=organization)
def test_job_template_copy(post, get, project, inventory, machine_credential, vault_credential, credential, alice, job_template_with_survey_passwords, admin):
job_template_with_survey_passwords.project = project
job_template_with_survey_passwords.inventory = inventory
job_template_with_survey_passwords.labels.add(label)
job_template_with_survey_passwords.instance_groups.add(ig)
job_template_with_survey_passwords.prevent_instance_group_fallback = True
job_template_with_survey_passwords.save()
job_template_with_survey_passwords.credentials.add(credential)
job_template_with_survey_passwords.credentials.add(machine_credential)
@@ -62,11 +54,6 @@ def test_job_template_copy(
assert vault_credential in jt_copy.credentials.all()
assert machine_credential in jt_copy.credentials.all()
assert job_template_with_survey_passwords.survey_spec == jt_copy.survey_spec
assert jt_copy.labels.count() != 0
assert jt_copy.labels.get(pk=label.pk) == label
assert jt_copy.instance_groups.count() != 0
assert jt_copy.instance_groups.get(pk=ig.pk) == ig
assert jt_copy.prevent_instance_group_fallback == True
@pytest.mark.django_db
@@ -97,8 +84,6 @@ def test_inventory_copy(inventory, group_factory, post, get, alice, organization
host = group_1_1.hosts.create(name='host', inventory=inventory)
group_2_1.hosts.add(host)
inventory.admin_role.members.add(alice)
inventory.prevent_instance_group_fallback = True
inventory.save()
assert get(reverse('api:inventory_copy', kwargs={'pk': inventory.pk}), alice, expect=200).data['can_copy'] is False
inventory.organization.admin_role.members.add(alice)
assert get(reverse('api:inventory_copy', kwargs={'pk': inventory.pk}), alice, expect=200).data['can_copy'] is True
@@ -114,7 +99,6 @@ def test_inventory_copy(inventory, group_factory, post, get, alice, organization
assert inventory_copy.organization == organization
assert inventory_copy.created_by == alice
assert inventory_copy.name == 'new inv name'
assert inventory_copy.prevent_instance_group_fallback == True
assert set(group_1_1_copy.parents.all()) == set()
assert set(group_2_1_copy.parents.all()) == set([group_1_1_copy])
assert set(group_2_2_copy.parents.all()) == set([group_1_1_copy, group_2_1_copy])
@@ -125,22 +109,8 @@ def test_inventory_copy(inventory, group_factory, post, get, alice, organization
@pytest.mark.django_db
def test_workflow_job_template_copy(workflow_job_template, post, get, admin, organization):
'''
Tests the FIELDS_TO_PRESERVE_AT_COPY attribute on WFJTs
'''
workflow_job_template.organization = organization
label = Label.objects.create(name="foobar", organization=organization)
workflow_job_template.labels.add(label)
ee = ExecutionEnvironment.objects.create(name="barfoo", organization=organization)
workflow_job_template.execution_environment = ee
ig = InstanceGroup.objects.create(name="bazbar", organization=organization)
workflow_job_template.instance_groups.add(ig)
workflow_job_template.save()
jts = [JobTemplate.objects.create(name='test-jt-{}'.format(i)) for i in range(0, 5)]
nodes = [WorkflowJobTemplateNode.objects.create(workflow_job_template=workflow_job_template, unified_job_template=jts[i]) for i in range(0, 5)]
nodes[0].success_nodes.add(nodes[1])
@@ -154,16 +124,9 @@ def test_workflow_job_template_copy(workflow_job_template, post, get, admin, org
wfjt_copy = type(workflow_job_template).objects.get(pk=wfjt_copy_id)
args, kwargs = deep_copy_mock.call_args
deep_copy_model_obj(*args, **kwargs)
assert wfjt_copy.organization == organization
assert wfjt_copy.created_by == admin
assert wfjt_copy.name == 'new wfjt name'
assert wfjt_copy.labels.count() != 0
assert wfjt_copy.labels.get(pk=label.pk) == label
assert wfjt_copy.execution_environment == ee
assert wfjt_copy.instance_groups.count() != 0
assert wfjt_copy.instance_groups.get(pk=ig.pk) == ig
copied_node_list = [x for x in wfjt_copy.workflow_job_template_nodes.all()]
copied_node_list.sort(key=lambda x: int(x.unified_job_template.name[-1]))
for node, success_count, failure_count, always_count in zip(copied_node_list, [1, 1, 0, 0, 0], [1, 0, 0, 1, 0], [0, 0, 0, 0, 0]):

View File

@@ -74,37 +74,34 @@ GLqbpJyX2r3p/Rmo6mLY71SqpA==
@pytest.mark.django_db
def test_default_cred_types():
assert sorted(CredentialType.defaults.keys()) == sorted(
[
'aim',
'aws',
'azure_kv',
'azure_rm',
'centrify_vault_kv',
'conjur',
'controller',
'galaxy_api_token',
'gce',
'github_token',
'gitlab_token',
'gpg_public_key',
'hashivault_kv',
'hashivault_ssh',
'insights',
'kubernetes_bearer_token',
'net',
'openstack',
'registry',
'rhv',
'satellite6',
'scm',
'ssh',
'thycotic_dsv',
'thycotic_tss',
'vault',
'vmware',
]
)
assert sorted(CredentialType.defaults.keys()) == [
'aim',
'aws',
'azure_kv',
'azure_rm',
'centrify_vault_kv',
'conjur',
'controller',
'galaxy_api_token',
'gce',
'github_token',
'gitlab_token',
'hashivault_kv',
'hashivault_ssh',
'insights',
'kubernetes_bearer_token',
'net',
'openstack',
'registry',
'rhv',
'satellite6',
'scm',
'ssh',
'thycotic_dsv',
'thycotic_tss',
'vault',
'vmware',
]
for type_ in CredentialType.defaults.values():
assert type_().managed is True

Some files were not shown because too many files have changed in this diff Show More