mirror of
https://github.com/ansible/awx.git
synced 2026-04-02 08:45:04 -02:30
Compare commits
110 Commits
feature_mo
...
AAP-57614-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1a205af41f | ||
|
|
96bd35bfb4 | ||
|
|
21e73cb065 | ||
|
|
53be3d16bd | ||
|
|
377dfce197 | ||
|
|
ff68d6196d | ||
|
|
bfefee5aef | ||
|
|
0aaca1bffd | ||
|
|
679e48cbe8 | ||
|
|
c591eb4a7a | ||
|
|
cc2fbf332c | ||
|
|
1646694258 | ||
|
|
643a9849df | ||
|
|
8bd8bcda94 | ||
|
|
63f3c735ea | ||
|
|
7e29f9e3f2 | ||
|
|
c115e0168a | ||
|
|
619d8c67a9 | ||
|
|
0d08a4da60 | ||
|
|
36a1121cd8 | ||
|
|
212546f92b | ||
|
|
fad4881280 | ||
|
|
65b1867114 | ||
|
|
1a3085ff40 | ||
|
|
51ed59c506 | ||
|
|
670dfeed25 | ||
|
|
7384c73c9a | ||
|
|
25b43deec0 | ||
|
|
f74f82e30c | ||
|
|
be5fbf365e | ||
|
|
0995f7c5fe | ||
|
|
3fbc71e6c8 | ||
|
|
143d4cee34 | ||
|
|
af7fbea854 | ||
|
|
57f9eb093a | ||
|
|
8d191046b5 | ||
|
|
7a5f0998d2 | ||
|
|
d1f4fc3e97 | ||
|
|
0f2692b504 | ||
|
|
e1e2c60f2e | ||
|
|
d8a2aa1dc3 | ||
|
|
9d61e42ede | ||
|
|
2c71bcda32 | ||
|
|
a21f9fbdb8 | ||
|
|
2a35ce5524 | ||
|
|
567a980a03 | ||
|
|
9059cfbda6 | ||
|
|
d8fd953732 | ||
|
|
39851c392a | ||
|
|
aeba4a1a3f | ||
|
|
915deca78c | ||
|
|
1a79e853fe | ||
|
|
08f1507f70 | ||
|
|
994a2b3c04 | ||
|
|
7ccc14daeb | ||
|
|
9700fb01f2 | ||
|
|
c515b86fa6 | ||
|
|
01293f1b45 | ||
|
|
fd847862a7 | ||
|
|
980d9db192 | ||
|
|
f2438a0e86 | ||
|
|
707f2fa5da | ||
|
|
1f18396438 | ||
|
|
6f0cfb5ace | ||
|
|
fc0a4cddce | ||
|
|
99511efe81 | ||
|
|
30bf910bd5 | ||
|
|
c9085e4b7f | ||
|
|
5e93f60b9e | ||
|
|
6a031158ce | ||
|
|
749735b941 | ||
|
|
315f9c7eef | ||
|
|
00c0f7e8db | ||
|
|
37ccbc28bd | ||
|
|
63fafec76f | ||
|
|
cba01339a1 | ||
|
|
2622e9d295 | ||
|
|
a6afec6ebb | ||
|
|
f406a377f7 | ||
|
|
adc3e35978 | ||
|
|
838e67005c | ||
|
|
e13fcfe29f | ||
|
|
0f4e91419a | ||
|
|
cca70b242a | ||
|
|
edf459f8ec | ||
|
|
f4286216d6 | ||
|
|
0ab1fea731 | ||
|
|
e3ac581fdf | ||
|
|
5aa3e8cf3b | ||
|
|
8289003c0d | ||
|
|
125083538a | ||
|
|
ed5ab8becd | ||
|
|
fc0087f1b2 | ||
|
|
cfc5ad9d91 | ||
|
|
d929b767b6 | ||
|
|
5f434ac348 | ||
|
|
4de9c8356b | ||
|
|
91118adbd3 | ||
|
|
25f538277a | ||
|
|
82cb52d648 | ||
|
|
f7958b93bd | ||
|
|
3d68ca848e | ||
|
|
99dce79078 | ||
|
|
271383d018 | ||
|
|
1128ad5a57 | ||
|
|
823b736afe | ||
|
|
f80bbc57d8 | ||
|
|
12a7229ee9 | ||
|
|
ceed692354 | ||
|
|
36a00ec46b |
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -24,7 +24,7 @@ in as the first entry for your PR title.
|
||||
|
||||
|
||||
|
||||
##### ADDITIONAL INFORMATION
|
||||
##### STEPS TO REPRODUCE AND EXTRA INFO
|
||||
<!---
|
||||
Include additional information to help people understand the change here.
|
||||
For bugs that don't have a linked bug report, a step-by-step reproduction
|
||||
|
||||
40
.github/workflows/api_schema_check.yml
vendored
40
.github/workflows/api_schema_check.yml
vendored
@@ -45,15 +45,45 @@ jobs:
|
||||
make docker-runner 2>&1 | tee schema-diff.txt
|
||||
exit ${PIPESTATUS[0]}
|
||||
|
||||
- name: Add schema diff to job summary
|
||||
- name: Validate OpenAPI schema
|
||||
id: schema-validation
|
||||
continue-on-error: true
|
||||
run: |
|
||||
AWX_DOCKER_ARGS='-e GITHUB_ACTIONS' \
|
||||
AWX_DOCKER_CMD='make validate-openapi-schema' \
|
||||
make docker-runner 2>&1 | tee schema-validation.txt
|
||||
exit ${PIPESTATUS[0]}
|
||||
|
||||
- name: Add schema validation and diff to job summary
|
||||
if: always()
|
||||
# show text and if for some reason, it can't be generated, state that it can't be.
|
||||
run: |
|
||||
echo "## API Schema Change Detection Results" >> $GITHUB_STEP_SUMMARY
|
||||
echo "## API Schema Check Results" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Show validation status
|
||||
echo "### OpenAPI Validation" >> $GITHUB_STEP_SUMMARY
|
||||
if [ -f schema-validation.txt ] && grep -q "✓ Schema is valid" schema-validation.txt; then
|
||||
echo "✅ **Status:** PASSED - Schema is valid OpenAPI 3.0.3" >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "❌ **Status:** FAILED - Schema validation failed" >> $GITHUB_STEP_SUMMARY
|
||||
if [ -f schema-validation.txt ]; then
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "<details><summary>Validation errors</summary>" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
cat schema-validation.txt >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
echo "</details>" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
fi
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Show schema changes
|
||||
echo "### Schema Changes" >> $GITHUB_STEP_SUMMARY
|
||||
if [ -f schema-diff.txt ]; then
|
||||
if grep -q "^+" schema-diff.txt || grep -q "^-" schema-diff.txt; then
|
||||
echo "### Schema changes detected" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Changes detected** between this PR and the base branch" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
# Truncate to first 1000 lines to stay under GitHub's 1MB summary limit
|
||||
TOTAL_LINES=$(wc -l < schema-diff.txt)
|
||||
@@ -65,8 +95,8 @@ jobs:
|
||||
head -n 1000 schema-diff.txt >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "### No schema changes detected" >> $GITHUB_STEP_SUMMARY
|
||||
echo "No schema changes detected" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
else
|
||||
echo "### Unable to generate schema diff" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Unable to generate schema diff" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
|
||||
96
.github/workflows/ci.yml
vendored
96
.github/workflows/ci.yml
vendored
@@ -4,14 +4,46 @@ env:
|
||||
LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
|
||||
CI_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
DEV_DOCKER_OWNER: ${{ github.repository_owner }}
|
||||
COMPOSE_TAG: ${{ github.base_ref || 'devel' }}
|
||||
COMPOSE_TAG: ${{ github.base_ref || github.ref_name || 'devel' }}
|
||||
UPSTREAM_REPOSITORY_ID: 91594105
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- devel # needed to publish code coverage post-merge
|
||||
schedule:
|
||||
- cron: '0 12,18 * * 1-5'
|
||||
workflow_dispatch: {}
|
||||
jobs:
|
||||
trigger-release-branches:
|
||||
name: "Dispatch CI to release branches"
|
||||
if: github.event_name == 'schedule'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: write
|
||||
steps:
|
||||
- name: Trigger CI on release_4.6
|
||||
id: dispatch_release_46
|
||||
continue-on-error: true
|
||||
run: gh workflow run ci.yml --ref release_4.6
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GH_REPO: ${{ github.repository }}
|
||||
- name: Trigger CI on stable-2.6
|
||||
id: dispatch_stable_26
|
||||
continue-on-error: true
|
||||
run: gh workflow run ci.yml --ref stable-2.6
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GH_REPO: ${{ github.repository }}
|
||||
- name: Check dispatch results
|
||||
if: steps.dispatch_release_46.outcome == 'failure' || steps.dispatch_stable_26.outcome == 'failure'
|
||||
run: |
|
||||
echo "One or more dispatches failed:"
|
||||
echo " release_4.6: ${{ steps.dispatch_release_46.outcome }}"
|
||||
echo " stable-2.6: ${{ steps.dispatch_stable_26.outcome }}"
|
||||
exit 1
|
||||
|
||||
common-tests:
|
||||
name: ${{ matrix.tests.name }}
|
||||
runs-on: ubuntu-latest
|
||||
@@ -62,7 +94,11 @@ jobs:
|
||||
run: |
|
||||
if [ -f "reports/coverage.xml" ]; then
|
||||
sed -i '2i<!-- PR ${{ github.event.pull_request.number }} -->' reports/coverage.xml
|
||||
echo "Injected PR number ${{ github.event.pull_request.number }} into coverage.xml"
|
||||
echo "Injected PR number ${{ github.event.pull_request.number }} into reports/coverage.xml"
|
||||
fi
|
||||
if [ -f "awxkit/coverage.xml" ]; then
|
||||
sed -i '2i<!-- PR ${{ github.event.pull_request.number }} -->' awxkit/coverage.xml
|
||||
echo "Injected PR number ${{ github.event.pull_request.number }} into awxkit/coverage.xml"
|
||||
fi
|
||||
|
||||
- name: Upload test coverage to Codecov
|
||||
@@ -109,28 +145,32 @@ jobs:
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.tests.name }}-artifacts
|
||||
path: reports/coverage.xml
|
||||
path: |
|
||||
reports/coverage.xml
|
||||
awxkit/coverage.xml
|
||||
retention-days: 5
|
||||
|
||||
- name: Upload awx jUnit test reports
|
||||
- name: >-
|
||||
Upload ${{
|
||||
matrix.tests.coverage-upload-name || 'awx'
|
||||
}} jUnit test reports to the unified dashboard
|
||||
if: >-
|
||||
!cancelled()
|
||||
&& steps.make-run.outputs.test-result-files != ''
|
||||
&& github.event_name == 'push'
|
||||
&& env.UPSTREAM_REPOSITORY_ID == github.repository_id
|
||||
&& github.ref_name == github.event.repository.default_branch
|
||||
run: |
|
||||
for junit_file in $(echo '${{ steps.make-run.outputs.test-result-files }}' | sed 's/,/ /')
|
||||
do
|
||||
curl \
|
||||
-v \
|
||||
--user "${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_USER }}:${{ secrets.PDE_ORG_RESULTS_UPLOAD_PASSWORD }}" \
|
||||
--form "xunit_xml=@${junit_file}" \
|
||||
--form "component_name=${{ matrix.tests.coverage-upload-name || 'awx' }}" \
|
||||
--form "git_commit_sha=${{ github.sha }}" \
|
||||
--form "git_repository_url=https://github.com/${{ github.repository }}" \
|
||||
"${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_URL }}/api/results/upload/"
|
||||
done
|
||||
uses: ansible/gh-action-record-test-results@3784db66a1b7fb3809999a7251c8a7203a7ffbe8
|
||||
with:
|
||||
aggregation-server-url: ${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_URL }}
|
||||
http-auth-password: >-
|
||||
${{ secrets.PDE_ORG_RESULTS_UPLOAD_PASSWORD }}
|
||||
http-auth-username: >-
|
||||
${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_USER }}
|
||||
project-component-name: >-
|
||||
${{ matrix.tests.coverage-upload-name || 'awx' }}
|
||||
test-result-files: >-
|
||||
${{ steps.make-run.outputs.test-result-files }}
|
||||
|
||||
dev-env:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -212,7 +252,7 @@ jobs:
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set +e
|
||||
timeout 54m bash -elc '
|
||||
timeout 15m bash -elc '
|
||||
python -m pip install -r molecule/requirements.txt
|
||||
python -m pip install PyYAML # for awx/tools/scripts/rewrite-awx-operator-requirements.py
|
||||
$(realpath ../awx/tools/scripts/rewrite-awx-operator-requirements.py) molecule/requirements.yml $(realpath ../awx)
|
||||
@@ -294,18 +334,16 @@ jobs:
|
||||
&& github.event_name == 'push'
|
||||
&& env.UPSTREAM_REPOSITORY_ID == github.repository_id
|
||||
&& github.ref_name == github.event.repository.default_branch
|
||||
run: |
|
||||
for junit_file in $(echo '${{ steps.make-run.outputs.test-result-files }}' | sed 's/,/ /')
|
||||
do
|
||||
curl \
|
||||
-v \
|
||||
--user "${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_USER }}:${{ secrets.PDE_ORG_RESULTS_UPLOAD_PASSWORD }}" \
|
||||
--form "xunit_xml=@${junit_file}" \
|
||||
--form "component_name=awx" \
|
||||
--form "git_commit_sha=${{ github.sha }}" \
|
||||
--form "git_repository_url=https://github.com/${{ github.repository }}" \
|
||||
"${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_URL }}/api/results/upload/"
|
||||
done
|
||||
uses: ansible/gh-action-record-test-results@3784db66a1b7fb3809999a7251c8a7203a7ffbe8
|
||||
with:
|
||||
aggregation-server-url: ${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_URL }}
|
||||
http-auth-password: >-
|
||||
${{ secrets.PDE_ORG_RESULTS_UPLOAD_PASSWORD }}
|
||||
http-auth-username: >-
|
||||
${{ vars.PDE_ORG_RESULTS_AGGREGATOR_UPLOAD_USER }}
|
||||
project-component-name: awx
|
||||
test-result-files: >-
|
||||
${{ steps.make-run.outputs.test-result-files }}
|
||||
|
||||
collection-integration:
|
||||
name: awx_collection integration
|
||||
|
||||
176
.github/workflows/spec-sync-on-merge.yml
vendored
Normal file
176
.github/workflows/spec-sync-on-merge.yml
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
# Sync OpenAPI Spec on Merge
|
||||
#
|
||||
# This workflow runs when code is merged to the devel branch.
|
||||
# It runs the dev environment to generate the OpenAPI spec, then syncs it to
|
||||
# the central spec repository.
|
||||
#
|
||||
# FLOW: PR merged → push to branch → dev environment runs → spec synced to central repo
|
||||
#
|
||||
# NOTE: This is an inlined version for testing with private forks.
|
||||
# Production version will use a reusable workflow from the org repos.
|
||||
name: Sync OpenAPI Spec on Merge
|
||||
env:
|
||||
LC_ALL: "C.UTF-8"
|
||||
DEV_DOCKER_OWNER: ${{ github.repository_owner }}
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- devel
|
||||
workflow_dispatch: # Allow manual triggering for testing
|
||||
jobs:
|
||||
sync-openapi-spec:
|
||||
name: Sync OpenAPI spec to central repo
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
packages: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout Controller repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
show-progress: false
|
||||
|
||||
- name: Build awx_devel image to use for schema gen
|
||||
uses: ./.github/actions/awx_devel_image
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
private-github-key: ${{ secrets.PRIVATE_GITHUB_KEY }}
|
||||
|
||||
- name: Generate API Schema
|
||||
run: |
|
||||
DEV_DOCKER_TAG_BASE=ghcr.io/${OWNER_LC} \
|
||||
COMPOSE_TAG=${{ github.base_ref || github.ref_name }} \
|
||||
docker run -u $(id -u) --rm -v ${{ github.workspace }}:/awx_devel/:Z \
|
||||
--workdir=/awx_devel `make print-DEVEL_IMAGE_NAME` /start_tests.sh genschema
|
||||
|
||||
- name: Verify spec file exists
|
||||
run: |
|
||||
SPEC_FILE="./schema.json"
|
||||
if [ ! -f "$SPEC_FILE" ]; then
|
||||
echo "❌ Spec file not found at $SPEC_FILE"
|
||||
echo "Contents of workspace:"
|
||||
ls -la .
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Found spec file at $SPEC_FILE"
|
||||
|
||||
- name: Checkout spec repo
|
||||
id: checkout_spec_repo
|
||||
continue-on-error: true
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ansible-automation-platform/aap-openapi-specs
|
||||
ref: ${{ github.ref_name }}
|
||||
path: spec-repo
|
||||
token: ${{ secrets.OPENAPI_SPEC_SYNC_TOKEN }}
|
||||
|
||||
- name: Fail if branch doesn't exist
|
||||
if: steps.checkout_spec_repo.outcome == 'failure'
|
||||
run: |
|
||||
echo "##[error]❌ Branch '${{ github.ref_name }}' does not exist in the central spec repository."
|
||||
echo "##[error]Expected branch: ${{ github.ref_name }}"
|
||||
echo "##[error]This branch must be created in the spec repo before specs can be synced."
|
||||
exit 1
|
||||
|
||||
- name: Compare specs
|
||||
id: compare
|
||||
run: |
|
||||
COMPONENT_SPEC="./schema.json"
|
||||
SPEC_REPO_FILE="spec-repo/controller.json"
|
||||
|
||||
# Check if spec file exists in spec repo
|
||||
if [ ! -f "$SPEC_REPO_FILE" ]; then
|
||||
echo "Spec file doesn't exist in spec repo - will create new file"
|
||||
echo "has_diff=true" >> $GITHUB_OUTPUT
|
||||
echo "is_new_file=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
# Compare files
|
||||
if diff -q "$COMPONENT_SPEC" "$SPEC_REPO_FILE" > /dev/null; then
|
||||
echo "✅ No differences found - specs are identical"
|
||||
echo "has_diff=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "📝 Differences found - spec has changed"
|
||||
echo "has_diff=true" >> $GITHUB_OUTPUT
|
||||
echo "is_new_file=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Update spec file
|
||||
if: steps.compare.outputs.has_diff == 'true'
|
||||
run: |
|
||||
cp "./schema.json" "spec-repo/controller.json"
|
||||
echo "✅ Updated spec-repo/controller.json"
|
||||
|
||||
- name: Create PR in spec repo
|
||||
if: steps.compare.outputs.has_diff == 'true'
|
||||
working-directory: spec-repo
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.OPENAPI_SPEC_SYNC_TOKEN }}
|
||||
COMMIT_MESSAGE: ${{ github.event.head_commit.message }}
|
||||
run: |
|
||||
# Configure git
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
# Create branch for PR
|
||||
SHORT_SHA="${{ github.sha }}"
|
||||
SHORT_SHA="${SHORT_SHA:0:7}"
|
||||
BRANCH_NAME="update-Controller-${{ github.ref_name }}-${SHORT_SHA}"
|
||||
git checkout -b "$BRANCH_NAME"
|
||||
|
||||
# Add and commit changes
|
||||
git add "controller.json"
|
||||
|
||||
if [ "${{ steps.compare.outputs.is_new_file }}" == "true" ]; then
|
||||
COMMIT_MSG="Add Controller OpenAPI spec for ${{ github.ref_name }}"
|
||||
else
|
||||
COMMIT_MSG="Update Controller OpenAPI spec for ${{ github.ref_name }}"
|
||||
fi
|
||||
|
||||
git commit -m "$COMMIT_MSG
|
||||
|
||||
Synced from ${{ github.repository }}@${{ github.sha }}
|
||||
Source branch: ${{ github.ref_name }}
|
||||
|
||||
Co-Authored-By: github-actions[bot] <github-actions[bot]@users.noreply.github.com>"
|
||||
|
||||
# Push branch
|
||||
git push origin "$BRANCH_NAME"
|
||||
|
||||
# Create PR
|
||||
PR_TITLE="[${{ github.ref_name }}] Update Controller spec from merged commit"
|
||||
PR_BODY="## Summary
|
||||
Automated OpenAPI spec sync from component repository merge.
|
||||
|
||||
**Source:** ${{ github.repository }}@${{ github.sha }}
|
||||
**Branch:** \`${{ github.ref_name }}\`
|
||||
**Component:** \`Controller\`
|
||||
**Spec File:** \`controller.json\`
|
||||
|
||||
## Changes
|
||||
$(if [ "${{ steps.compare.outputs.is_new_file }}" == "true" ]; then echo "- 🆕 New spec file created"; else echo "- 📝 Spec file updated with latest changes"; fi)
|
||||
|
||||
## Source Commit
|
||||
\`\`\`
|
||||
${COMMIT_MESSAGE}
|
||||
\`\`\`
|
||||
|
||||
---
|
||||
🤖 This PR was automatically generated by the OpenAPI spec sync workflow."
|
||||
|
||||
gh pr create \
|
||||
--title "$PR_TITLE" \
|
||||
--body "$PR_BODY" \
|
||||
--base "${{ github.ref_name }}" \
|
||||
--head "$BRANCH_NAME"
|
||||
|
||||
echo "✅ Created PR in spec repo"
|
||||
|
||||
- name: Report results
|
||||
if: always()
|
||||
run: |
|
||||
if [ "${{ steps.compare.outputs.has_diff }}" == "true" ]; then
|
||||
echo "📝 Spec sync completed - PR created in spec repo"
|
||||
else
|
||||
echo "✅ Spec sync completed - no changes needed"
|
||||
fi
|
||||
10
Makefile
10
Makefile
@@ -1,6 +1,6 @@
|
||||
-include awx/ui/Makefile
|
||||
|
||||
PYTHON := $(notdir $(shell for i in python3.12 python3; do command -v $$i; done|sed 1q))
|
||||
PYTHON := $(notdir $(shell for i in python3.12 python3.11 python3; do command -v $$i; done|sed 1q))
|
||||
SHELL := bash
|
||||
DOCKER_COMPOSE ?= docker compose
|
||||
OFFICIAL ?= no
|
||||
@@ -79,7 +79,7 @@ RECEPTOR_IMAGE ?= quay.io/ansible/receptor:devel
|
||||
SRC_ONLY_PKGS ?= cffi,pycparser,psycopg,twilio
|
||||
# These should be upgraded in the AWX and Ansible venv before attempting
|
||||
# to install the actual requirements
|
||||
VENV_BOOTSTRAP ?= pip==25.3 setuptools==80.9.0 setuptools_scm[toml]==9.2.2 wheel==0.45.1 cython==3.1.3
|
||||
VENV_BOOTSTRAP ?= pip==25.3 setuptools==80.9.0 setuptools_scm[toml]==9.2.2 wheel==0.46.3 cython==3.1.3
|
||||
|
||||
NAME ?= awx
|
||||
|
||||
@@ -289,7 +289,7 @@ dispatcher:
|
||||
@if [ "$(VENV_BASE)" ]; then \
|
||||
. $(VENV_BASE)/awx/bin/activate; \
|
||||
fi; \
|
||||
$(PYTHON) manage.py run_dispatcher
|
||||
$(PYTHON) manage.py dispatcherd
|
||||
|
||||
## Run to start the zeromq callback receiver
|
||||
receiver:
|
||||
@@ -579,6 +579,10 @@ detect-schema-change: genschema
|
||||
# diff exits with 1 when files differ - capture but don't fail
|
||||
-diff -u -b reference-schema.json schema.json
|
||||
|
||||
validate-openapi-schema: genschema
|
||||
@echo "Validating OpenAPI schema from schema.json..."
|
||||
@python3 -c "from openapi_spec_validator import validate; import json; spec = json.load(open('schema.json')); validate(spec); print('✓ OpenAPI Schema is valid!')"
|
||||
|
||||
docker-compose-clean: awx/projects
|
||||
$(DOCKER_COMPOSE) -f tools/docker-compose/_sources/docker-compose.yml rm -sf
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ class DeprecatedCredentialField(serializers.IntegerField):
|
||||
def to_internal_value(self, pk):
|
||||
try:
|
||||
pk = int(pk)
|
||||
except ValueError:
|
||||
except (ValueError, TypeError):
|
||||
self.fail('invalid')
|
||||
try:
|
||||
Credential.objects.get(pk=pk)
|
||||
|
||||
@@ -131,8 +131,14 @@ class LoggedLoginView(auth_views.LoginView):
|
||||
|
||||
|
||||
class LoggedLogoutView(auth_views.LogoutView):
|
||||
# Override http_method_names to allow GET requests (Django 5.2+ defaults to POST only)
|
||||
http_method_names = ["get", "post", "options"]
|
||||
success_url_allowed_hosts = set(settings.LOGOUT_ALLOWED_HOSTS.split(",")) if settings.LOGOUT_ALLOWED_HOSTS else set()
|
||||
|
||||
def get(self, request, *args, **kwargs):
|
||||
"""Handle GET requests for logout (for backward compatibility)."""
|
||||
return self.post(request, *args, **kwargs)
|
||||
|
||||
def dispatch(self, request, *args, **kwargs):
|
||||
if is_proxied_request():
|
||||
# 1) We intentionally don't obey ?next= here, just always redirect to platform login
|
||||
|
||||
@@ -111,7 +111,7 @@ class UnifiedJobEventPagination(Pagination):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.use_limit_paginator = False
|
||||
self.limit_pagination = LimitPagination()
|
||||
return super().__init__(*args, **kwargs)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def paginate_queryset(self, queryset, request, view=None):
|
||||
if 'limit' in request.query_params:
|
||||
|
||||
@@ -9,6 +9,50 @@ from drf_spectacular.views import (
|
||||
)
|
||||
|
||||
|
||||
def filter_credential_type_schema(
|
||||
result,
|
||||
generator, # NOSONAR
|
||||
request, # NOSONAR
|
||||
public, # NOSONAR
|
||||
):
|
||||
"""
|
||||
Postprocessing hook to filter CredentialType kind enum values.
|
||||
|
||||
For CredentialTypeRequest and PatchedCredentialTypeRequest schemas (POST/PUT/PATCH),
|
||||
filter the 'kind' enum to only show 'cloud' and 'net' values.
|
||||
|
||||
This ensures the OpenAPI schema accurately reflects that only 'cloud' and 'net'
|
||||
credential types can be created or modified via the API, matching the validation
|
||||
in CredentialTypeSerializer.validate().
|
||||
|
||||
Args:
|
||||
result: The OpenAPI schema dict to be modified
|
||||
generator, request, public: Required by drf-spectacular interface (unused)
|
||||
|
||||
Returns:
|
||||
The modified OpenAPI schema dict
|
||||
"""
|
||||
schemas = result.get('components', {}).get('schemas', {})
|
||||
|
||||
# Filter CredentialTypeRequest (POST/PUT) - field is required
|
||||
if 'CredentialTypeRequest' in schemas:
|
||||
kind_prop = schemas['CredentialTypeRequest'].get('properties', {}).get('kind', {})
|
||||
if 'enum' in kind_prop:
|
||||
# Filter to only cloud and net (no None - field is required)
|
||||
kind_prop['enum'] = ['cloud', 'net']
|
||||
kind_prop['description'] = "* `cloud` - Cloud\\n* `net` - Network"
|
||||
|
||||
# Filter PatchedCredentialTypeRequest (PATCH) - field is optional
|
||||
if 'PatchedCredentialTypeRequest' in schemas:
|
||||
kind_prop = schemas['PatchedCredentialTypeRequest'].get('properties', {}).get('kind', {})
|
||||
if 'enum' in kind_prop:
|
||||
# Filter to only cloud and net (None allowed - field can be omitted in PATCH)
|
||||
kind_prop['enum'] = ['cloud', 'net', None]
|
||||
kind_prop['description'] = "* `cloud` - Cloud\\n* `net` - Network"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class CustomAutoSchema(AutoSchema):
|
||||
"""Custom AutoSchema to add swagger_topic to tags and handle deprecated endpoints."""
|
||||
|
||||
|
||||
@@ -1230,7 +1230,7 @@ class OrganizationSerializer(BaseSerializer, OpaQueryPathMixin):
|
||||
# to a team. This provides a hint to the ui so it can know to not
|
||||
# display these roles for team role selection.
|
||||
for key in ('admin_role', 'member_role'):
|
||||
if key in summary_dict.get('object_roles', {}):
|
||||
if summary_dict and key in summary_dict.get('object_roles', {}):
|
||||
summary_dict['object_roles'][key]['user_only'] = True
|
||||
|
||||
return summary_dict
|
||||
@@ -2165,13 +2165,13 @@ class BulkHostDeleteSerializer(serializers.Serializer):
|
||||
attrs['hosts_data'] = attrs['host_qs'].values()
|
||||
|
||||
if len(attrs['host_qs']) == 0:
|
||||
error_hosts = {host: "Hosts do not exist or you lack permission to delete it" for host in attrs['hosts']}
|
||||
error_hosts = dict.fromkeys(attrs['hosts'], "Hosts do not exist or you lack permission to delete it")
|
||||
raise serializers.ValidationError({'hosts': error_hosts})
|
||||
|
||||
if len(attrs['host_qs']) < len(attrs['hosts']):
|
||||
hosts_exists = [host['id'] for host in attrs['hosts_data']]
|
||||
failed_hosts = list(set(attrs['hosts']).difference(hosts_exists))
|
||||
error_hosts = {host: "Hosts do not exist or you lack permission to delete it" for host in failed_hosts}
|
||||
error_hosts = dict.fromkeys(failed_hosts, "Hosts do not exist or you lack permission to delete it")
|
||||
raise serializers.ValidationError({'hosts': error_hosts})
|
||||
|
||||
# Getting all inventories that the hosts can be in
|
||||
@@ -3527,7 +3527,7 @@ class JobRelaunchSerializer(BaseSerializer):
|
||||
choices=NEW_JOB_TYPE_CHOICES,
|
||||
write_only=True,
|
||||
)
|
||||
credential_passwords = VerbatimField(required=True, write_only=True)
|
||||
credential_passwords = VerbatimField(required=False, write_only=True)
|
||||
|
||||
class Meta:
|
||||
model = Job
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% if content_only %}<div class="nocode ansi_fore ansi_back{% if dark %} ansi_dark{% endif %}">{% else %}
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>{{ title }}</title>
|
||||
|
||||
@@ -52,6 +52,7 @@ from ansi2html import Ansi2HTMLConverter
|
||||
|
||||
from datetime import timezone as dt_timezone
|
||||
from wsgiref.util import FileWrapper
|
||||
from drf_spectacular.utils import extend_schema_view, extend_schema
|
||||
|
||||
# django-ansible-base
|
||||
from ansible_base.lib.utils.requests import get_remote_hosts
|
||||
@@ -378,6 +379,10 @@ class DashboardJobsGraphView(APIView):
|
||||
|
||||
|
||||
class InstanceList(ListCreateAPIView):
|
||||
"""
|
||||
Creates an instance if used on a Kubernetes or OpenShift deployment of Ansible Automation Platform.
|
||||
"""
|
||||
|
||||
name = _("Instances")
|
||||
model = models.Instance
|
||||
serializer_class = serializers.InstanceSerializer
|
||||
@@ -1454,7 +1459,7 @@ class CredentialList(ListCreateAPIView):
|
||||
|
||||
@extend_schema_if_available(
|
||||
extensions={
|
||||
"x-ai-description": "Create a new credential. The `inputs` field contain type-specific input fields. The required fields depend on related `credential_type`. Use GET /v2/credential_types/{id}/ (tool name: controller.credential_types_retrieve) and inspect `inputs` field for the specific credential type's expected schema."
|
||||
"x-ai-description": "Create a new credential. The `inputs` field contain type-specific input fields. The required fields depend on related `credential_type`. Use GET /v2/credential_types/{id}/ (tool name: controller.credential_types_retrieve) and inspect `inputs` field for the specific credential type's expected schema. The fields `user` and `team` are deprecated and should not be included in the payload."
|
||||
}
|
||||
)
|
||||
def post(self, request, *args, **kwargs):
|
||||
@@ -1603,7 +1608,11 @@ class CredentialExternalTest(SubDetailAPIView):
|
||||
obj_permission_type = 'use'
|
||||
resource_purpose = 'test external credential'
|
||||
|
||||
@extend_schema_if_available(extensions={"x-ai-description": "Test update the input values and metadata of an external credential"})
|
||||
@extend_schema_if_available(extensions={"x-ai-description": """Test update the input values and metadata of an external credential.
|
||||
This endpoint supports testing credentials that connect to external secret management systems
|
||||
such as CyberArk AIM, CyberArk Conjur, HashiCorp Vault, AWS Secrets Manager, Azure Key Vault,
|
||||
Centrify Vault, Thycotic DevOps Secrets Vault, and GitHub App Installation Access Token Lookup.
|
||||
It does not support standard credential types such as Machine, SCM, and Cloud."""})
|
||||
def post(self, request, *args, **kwargs):
|
||||
obj = self.get_object()
|
||||
backend_kwargs = {}
|
||||
@@ -1617,13 +1626,16 @@ class CredentialExternalTest(SubDetailAPIView):
|
||||
with set_environ(**settings.AWX_TASK_ENV):
|
||||
obj.credential_type.plugin.backend(**backend_kwargs)
|
||||
return Response({}, status=status.HTTP_202_ACCEPTED)
|
||||
except requests.exceptions.HTTPError as exc:
|
||||
message = 'HTTP {}'.format(exc.response.status_code)
|
||||
return Response({'inputs': message}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except requests.exceptions.HTTPError:
|
||||
message = """Test operation is not supported for credential type {}.
|
||||
This endpoint only supports credentials that connect to
|
||||
external secret management systems such as CyberArk, HashiCorp
|
||||
Vault, or cloud-based secret managers.""".format(obj.credential_type.kind)
|
||||
return Response({'detail': message}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except Exception as exc:
|
||||
message = exc.__class__.__name__
|
||||
args = getattr(exc, 'args', [])
|
||||
for a in args:
|
||||
exc_args = getattr(exc, 'args', [])
|
||||
for a in exc_args:
|
||||
if isinstance(getattr(a, 'reason', None), ConnectTimeoutError):
|
||||
message = str(a.reason)
|
||||
return Response({'inputs': message}, status=status.HTTP_400_BAD_REQUEST)
|
||||
@@ -1681,8 +1693,8 @@ class CredentialTypeExternalTest(SubDetailAPIView):
|
||||
return Response({'inputs': message}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except Exception as exc:
|
||||
message = exc.__class__.__name__
|
||||
args = getattr(exc, 'args', [])
|
||||
for a in args:
|
||||
args_exc = getattr(exc, 'args', [])
|
||||
for a in args_exc:
|
||||
if isinstance(getattr(a, 'reason', None), ConnectTimeoutError):
|
||||
message = str(a.reason)
|
||||
return Response({'inputs': message}, status=status.HTTP_400_BAD_REQUEST)
|
||||
@@ -2469,6 +2481,11 @@ class JobTemplateDetail(RelatedJobsPreventDeleteMixin, RetrieveUpdateDestroyAPIV
|
||||
resource_purpose = 'job template detail'
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
retrieve=extend_schema(
|
||||
extensions={'x-ai-description': 'List job template launch criteria'},
|
||||
)
|
||||
)
|
||||
class JobTemplateLaunch(RetrieveAPIView):
|
||||
model = models.JobTemplate
|
||||
obj_permission_type = 'start'
|
||||
@@ -2477,6 +2494,9 @@ class JobTemplateLaunch(RetrieveAPIView):
|
||||
resource_purpose = 'launch a job from a job template'
|
||||
|
||||
def update_raw_data(self, data):
|
||||
"""
|
||||
Use the ID of a job template to retrieve its launch details.
|
||||
"""
|
||||
try:
|
||||
obj = self.get_object()
|
||||
except PermissionDenied:
|
||||
@@ -3310,6 +3330,11 @@ class WorkflowJobTemplateLabelList(JobTemplateLabelList):
|
||||
resource_purpose = 'labels of a workflow job template'
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
retrieve=extend_schema(
|
||||
extensions={'x-ai-description': 'List workflow job template launch criteria.'},
|
||||
)
|
||||
)
|
||||
class WorkflowJobTemplateLaunch(RetrieveAPIView):
|
||||
model = models.WorkflowJobTemplate
|
||||
obj_permission_type = 'start'
|
||||
@@ -3318,6 +3343,9 @@ class WorkflowJobTemplateLaunch(RetrieveAPIView):
|
||||
resource_purpose = 'launch a workflow job from a workflow job template'
|
||||
|
||||
def update_raw_data(self, data):
|
||||
"""
|
||||
Use the ID of a workflow job template to retrieve its launch details.
|
||||
"""
|
||||
try:
|
||||
obj = self.get_object()
|
||||
except PermissionDenied:
|
||||
@@ -3710,6 +3738,11 @@ class JobCancel(GenericCancelView):
|
||||
return super().post(request, *args, **kwargs)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
retrieve=extend_schema(
|
||||
extensions={'x-ai-description': 'List job relaunch criteria'},
|
||||
)
|
||||
)
|
||||
class JobRelaunch(RetrieveAPIView):
|
||||
model = models.Job
|
||||
obj_permission_type = 'start'
|
||||
@@ -3717,6 +3750,7 @@ class JobRelaunch(RetrieveAPIView):
|
||||
resource_purpose = 'relaunch a job'
|
||||
|
||||
def update_raw_data(self, data):
|
||||
"""Use the ID of a job to retrieve data on retry attempts and necessary passwords."""
|
||||
data = super(JobRelaunch, self).update_raw_data(data)
|
||||
try:
|
||||
obj = self.get_object()
|
||||
|
||||
@@ -25,7 +25,6 @@ import requests
|
||||
|
||||
from ansible_base.lib.utils.schema import extend_schema_if_available
|
||||
|
||||
from awx import MODE
|
||||
from awx.api.generics import APIView
|
||||
from awx.conf.registry import settings_registry
|
||||
from awx.main.analytics import all_collectors
|
||||
@@ -33,7 +32,7 @@ from awx.main.ha import is_ha_environment
|
||||
from awx.main.tasks.system import clear_setting_cache
|
||||
from awx.main.utils import get_awx_version, get_custom_venv_choices
|
||||
from awx.main.utils.licensing import validate_entitlement_manifest
|
||||
from awx.api.versioning import URLPathVersioning, reverse, drf_reverse
|
||||
from awx.api.versioning import URLPathVersioning, reverse
|
||||
from awx.main.constants import PRIVILEGE_ESCALATION_METHODS
|
||||
from awx.main.models import Project, Organization, Instance, InstanceGroup, JobTemplate
|
||||
from awx.main.utils import set_environ
|
||||
@@ -62,8 +61,6 @@ class ApiRootView(APIView):
|
||||
data['custom_logo'] = settings.CUSTOM_LOGO
|
||||
data['custom_login_info'] = settings.CUSTOM_LOGIN_INFO
|
||||
data['login_redirect_override'] = settings.LOGIN_REDIRECT_OVERRIDE
|
||||
if MODE == 'development':
|
||||
data['docs'] = drf_reverse('api:schema-swagger-ui')
|
||||
return Response(data)
|
||||
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ class WebhookReceiverBase(APIView):
|
||||
|
||||
@csrf_exempt
|
||||
@extend_schema_if_available(extensions={"x-ai-description": "Receive a webhook event and trigger a job"})
|
||||
def post(self, request, *args, **kwargs):
|
||||
def post(self, request, *args, **kwargs_in):
|
||||
# Ensure that the full contents of the request are captured for multiple uses.
|
||||
request.body
|
||||
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
# Python
|
||||
import logging
|
||||
|
||||
# Dispatcherd
|
||||
from dispatcherd.publish import task
|
||||
|
||||
# AWX
|
||||
from awx.main.analytics.subsystem_metrics import DispatcherMetrics, CallbackReceiverMetrics
|
||||
from awx.main.dispatch.publish import task as task_awx
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
|
||||
logger = logging.getLogger('awx.main.scheduler')
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=300, on_duplicate='discard')
|
||||
@task(queue=get_task_queuename, timeout=300, on_duplicate='discard')
|
||||
def send_subsystem_metrics():
|
||||
DispatcherMetrics().send_metrics()
|
||||
CallbackReceiverMetrics().send_metrics()
|
||||
|
||||
41
awx/main/analytics/dispatcherd_metrics.py
Normal file
41
awx/main/analytics/dispatcherd_metrics.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import http.client
|
||||
import socket
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_dispatcherd_metrics(request):
|
||||
metrics_cfg = settings.METRICS_SUBSYSTEM_CONFIG.get('server', {}).get(settings.METRICS_SERVICE_DISPATCHER, {})
|
||||
host = metrics_cfg.get('host', 'localhost')
|
||||
port = metrics_cfg.get('port', 8015)
|
||||
metrics_filter = []
|
||||
if request is not None and hasattr(request, "query_params"):
|
||||
try:
|
||||
nodes_filter = request.query_params.getlist("node")
|
||||
except Exception:
|
||||
nodes_filter = []
|
||||
if nodes_filter and settings.CLUSTER_HOST_ID not in nodes_filter:
|
||||
return ''
|
||||
try:
|
||||
metrics_filter = request.query_params.getlist("metric")
|
||||
except Exception:
|
||||
metrics_filter = []
|
||||
if metrics_filter:
|
||||
# Right now we have no way of filtering the dispatcherd metrics
|
||||
# so just avoid getting in the way if another metric is filtered for
|
||||
return ''
|
||||
url = f"http://{host}:{port}/metrics"
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=1.0) as response:
|
||||
payload = response.read()
|
||||
if not payload:
|
||||
return ''
|
||||
return payload.decode('utf-8')
|
||||
except (urllib.error.URLError, UnicodeError, socket.timeout, TimeoutError, http.client.HTTPException) as exc:
|
||||
logger.debug(f"Failed to collect dispatcherd metrics from {url}: {exc}")
|
||||
return ''
|
||||
@@ -15,6 +15,7 @@ from rest_framework.request import Request
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
from awx.main.utils import is_testing
|
||||
from awx.main.utils.redis import get_redis_client
|
||||
from .dispatcherd_metrics import get_dispatcherd_metrics
|
||||
|
||||
root_key = settings.SUBSYSTEM_METRICS_REDIS_KEY_PREFIX
|
||||
logger = logging.getLogger('awx.main.analytics')
|
||||
@@ -398,11 +399,6 @@ class DispatcherMetrics(Metrics):
|
||||
SetFloatM('workflow_manager_recorded_timestamp', 'Unix timestamp when metrics were last recorded'),
|
||||
SetFloatM('workflow_manager_spawn_workflow_graph_jobs_seconds', 'Time spent spawning workflow tasks'),
|
||||
SetFloatM('workflow_manager_get_tasks_seconds', 'Time spent loading workflow tasks from db'),
|
||||
# dispatcher subsystem metrics
|
||||
SetIntM('dispatcher_pool_scale_up_events', 'Number of times local dispatcher scaled up a worker since startup'),
|
||||
SetIntM('dispatcher_pool_active_task_count', 'Number of active tasks in the worker pool when last task was submitted'),
|
||||
SetIntM('dispatcher_pool_max_worker_count', 'Highest number of workers in worker pool in last collection interval, about 20s'),
|
||||
SetFloatM('dispatcher_availability', 'Fraction of time (in last collection interval) dispatcher was able to receive messages'),
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -430,8 +426,12 @@ class CallbackReceiverMetrics(Metrics):
|
||||
|
||||
def metrics(request):
|
||||
output_text = ''
|
||||
for m in [DispatcherMetrics(), CallbackReceiverMetrics()]:
|
||||
output_text += m.generate_metrics(request)
|
||||
output_text += DispatcherMetrics().generate_metrics(request)
|
||||
output_text += CallbackReceiverMetrics().generate_metrics(request)
|
||||
|
||||
dispatcherd_metrics = get_dispatcherd_metrics(request)
|
||||
if dispatcherd_metrics:
|
||||
output_text += dispatcherd_metrics
|
||||
return output_text
|
||||
|
||||
|
||||
@@ -481,13 +481,6 @@ class CallbackReceiverMetricsServer(MetricsServer):
|
||||
super().__init__(settings.METRICS_SERVICE_CALLBACK_RECEIVER, registry)
|
||||
|
||||
|
||||
class DispatcherMetricsServer(MetricsServer):
|
||||
def __init__(self):
|
||||
registry = CollectorRegistry(auto_describe=True)
|
||||
registry.register(CustomToPrometheusMetricsCollector(DispatcherMetrics(metrics_have_changed=False)))
|
||||
super().__init__(settings.METRICS_SERVICE_DISPATCHER, registry)
|
||||
|
||||
|
||||
class WebsocketsMetricsServer(MetricsServer):
|
||||
def __init__(self):
|
||||
registry = CollectorRegistry(auto_describe=True)
|
||||
|
||||
@@ -82,7 +82,7 @@ class MainConfig(AppConfig):
|
||||
def configure_dispatcherd(self):
|
||||
"""This implements the default configuration for dispatcherd
|
||||
|
||||
If running the tasking service like awx-manage run_dispatcher,
|
||||
If running the tasking service like awx-manage dispatcherd,
|
||||
some additional config will be applied on top of this.
|
||||
This configuration provides the minimum such that code can submit
|
||||
tasks to pg_notify to run those tasks.
|
||||
|
||||
@@ -11,6 +11,7 @@ __all__ = [
|
||||
'CAN_CANCEL',
|
||||
'ACTIVE_STATES',
|
||||
'STANDARD_INVENTORY_UPDATE_ENV',
|
||||
'OIDC_CREDENTIAL_TYPE_NAMESPACES',
|
||||
]
|
||||
|
||||
PRIVILEGE_ESCALATION_METHODS = [
|
||||
@@ -140,3 +141,6 @@ org_role_to_permission = {
|
||||
'execution_environment_admin_role': 'add_executionenvironment',
|
||||
'auditor_role': 'view_project', # TODO: also doesnt really work
|
||||
}
|
||||
|
||||
# OIDC credential type namespaces for feature flag filtering
|
||||
OIDC_CREDENTIAL_TYPE_NAMESPACES = ['hashivault-kv-oidc', 'hashivault-ssh-oidc']
|
||||
|
||||
@@ -77,14 +77,13 @@ class PubSub(object):
|
||||
n = psycopg.connection.Notify(pgn.relname.decode(enc), pgn.extra.decode(enc), pgn.be_pid)
|
||||
yield n
|
||||
|
||||
def events(self, yield_timeouts=False):
|
||||
def events(self):
|
||||
if not self.conn.autocommit:
|
||||
raise RuntimeError('Listening for events can only be done in autocommit mode')
|
||||
|
||||
while True:
|
||||
if select.select([self.conn], [], [], self.select_timeout) == NOT_READY:
|
||||
if yield_timeouts:
|
||||
yield None
|
||||
yield None
|
||||
else:
|
||||
notification_generator = self.current_notifies(self.conn)
|
||||
for notification in notification_generator:
|
||||
|
||||
@@ -2,7 +2,7 @@ from django.conf import settings
|
||||
|
||||
from ansible_base.lib.utils.db import get_pg_notify_params
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.pool import get_auto_max_workers
|
||||
from awx.main.utils.common import get_auto_max_workers
|
||||
|
||||
|
||||
def get_dispatcherd_config(for_service: bool = False, mock_publish: bool = False) -> dict:
|
||||
@@ -27,10 +27,14 @@ def get_dispatcherd_config(for_service: bool = False, mock_publish: bool = False
|
||||
"pool_kwargs": {
|
||||
"min_workers": settings.JOB_EVENT_WORKERS,
|
||||
"max_workers": max_workers,
|
||||
# This must be less than max_workers to make sense, which is usually 4
|
||||
# With reserve of 1, after a burst of tasks, load needs to down to 4-1=3
|
||||
# before we return to min_workers
|
||||
"scaledown_reserve": 1,
|
||||
},
|
||||
"main_kwargs": {"node_id": settings.CLUSTER_HOST_ID},
|
||||
"process_manager_cls": "ForkServerManager",
|
||||
"process_manager_kwargs": {"preload_modules": ['awx.main.dispatch.hazmat']},
|
||||
"process_manager_kwargs": {"preload_modules": ['awx.main.dispatch.prefork']},
|
||||
},
|
||||
"brokers": {},
|
||||
"publish": {},
|
||||
@@ -38,8 +42,8 @@ def get_dispatcherd_config(for_service: bool = False, mock_publish: bool = False
|
||||
}
|
||||
|
||||
if mock_publish:
|
||||
config["brokers"]["noop"] = {}
|
||||
config["publish"]["default_broker"] = "noop"
|
||||
config["brokers"]["dispatcherd.testing.brokers.noop"] = {}
|
||||
config["publish"]["default_broker"] = "dispatcherd.testing.brokers.noop"
|
||||
else:
|
||||
config["brokers"]["pg_notify"] = {
|
||||
"config": get_pg_notify_params(),
|
||||
@@ -56,5 +60,11 @@ def get_dispatcherd_config(for_service: bool = False, mock_publish: bool = False
|
||||
}
|
||||
|
||||
config["brokers"]["pg_notify"]["channels"] = ['tower_broadcast_all', 'tower_settings_change', get_task_queuename()]
|
||||
metrics_cfg = settings.METRICS_SUBSYSTEM_CONFIG.get('server', {}).get(settings.METRICS_SERVICE_DISPATCHER)
|
||||
if metrics_cfg:
|
||||
config["service"]["metrics_kwargs"] = {
|
||||
"host": metrics_cfg.get("host", "localhost"),
|
||||
"port": metrics_cfg.get("port", 8015),
|
||||
}
|
||||
|
||||
return config
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
import logging
|
||||
import uuid
|
||||
import json
|
||||
|
||||
from django.db import connection
|
||||
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.utils.redis import get_redis_client
|
||||
|
||||
from . import pg_bus_conn
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
class Control(object):
|
||||
services = ('dispatcher', 'callback_receiver')
|
||||
result = None
|
||||
|
||||
def __init__(self, service, host=None):
|
||||
if service not in self.services:
|
||||
raise RuntimeError('{} must be in {}'.format(service, self.services))
|
||||
self.service = service
|
||||
self.queuename = host or get_task_queuename()
|
||||
|
||||
def status(self, *args, **kwargs):
|
||||
r = get_redis_client()
|
||||
if self.service == 'dispatcher':
|
||||
stats = r.get(f'awx_{self.service}_statistics') or b''
|
||||
return stats.decode('utf-8')
|
||||
else:
|
||||
workers = []
|
||||
for key in r.keys('awx_callback_receiver_statistics_*'):
|
||||
workers.append(r.get(key).decode('utf-8'))
|
||||
return '\n'.join(workers)
|
||||
|
||||
def running(self, *args, **kwargs):
|
||||
return self.control_with_reply('running', *args, **kwargs)
|
||||
|
||||
def cancel(self, task_ids, with_reply=True):
|
||||
if with_reply:
|
||||
return self.control_with_reply('cancel', extra_data={'task_ids': task_ids})
|
||||
else:
|
||||
self.control({'control': 'cancel', 'task_ids': task_ids, 'reply_to': None}, extra_data={'task_ids': task_ids})
|
||||
|
||||
def schedule(self, *args, **kwargs):
|
||||
return self.control_with_reply('schedule', *args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def generate_reply_queue_name(cls):
|
||||
return f"reply_to_{str(uuid.uuid4()).replace('-','_')}"
|
||||
|
||||
def control_with_reply(self, command, timeout=5, extra_data=None):
|
||||
logger.warning('checking {} {} for {}'.format(self.service, command, self.queuename))
|
||||
reply_queue = Control.generate_reply_queue_name()
|
||||
self.result = None
|
||||
|
||||
if not connection.get_autocommit():
|
||||
raise RuntimeError('Control-with-reply messages can only be done in autocommit mode')
|
||||
|
||||
with pg_bus_conn(select_timeout=timeout) as conn:
|
||||
conn.listen(reply_queue)
|
||||
send_data = {'control': command, 'reply_to': reply_queue}
|
||||
if extra_data:
|
||||
send_data.update(extra_data)
|
||||
conn.notify(self.queuename, json.dumps(send_data))
|
||||
|
||||
for reply in conn.events(yield_timeouts=True):
|
||||
if reply is None:
|
||||
logger.error(f'{self.service} did not reply within {timeout}s')
|
||||
raise RuntimeError(f"{self.service} did not reply within {timeout}s")
|
||||
break
|
||||
|
||||
return json.loads(reply.payload)
|
||||
|
||||
def control(self, msg, **kwargs):
|
||||
with pg_bus_conn() as conn:
|
||||
conn.notify(self.queuename, json.dumps(msg))
|
||||
@@ -1,146 +0,0 @@
|
||||
import logging
|
||||
import time
|
||||
import yaml
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch.periodic')
|
||||
|
||||
|
||||
class ScheduledTask:
|
||||
"""
|
||||
Class representing schedules, very loosely modeled after python schedule library Job
|
||||
the idea of this class is to:
|
||||
- only deal in relative times (time since the scheduler global start)
|
||||
- only deal in integer math for target runtimes, but float for current relative time
|
||||
|
||||
Missed schedule policy:
|
||||
Invariant target times are maintained, meaning that if interval=10s offset=0
|
||||
and it runs at t=7s, then it calls for next run in 3s.
|
||||
However, if a complete interval has passed, that is counted as a missed run,
|
||||
and missed runs are abandoned (no catch-up runs).
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, data: dict):
|
||||
# parameters need for schedule computation
|
||||
self.interval = int(data['schedule'].total_seconds())
|
||||
self.offset = 0 # offset relative to start time this schedule begins
|
||||
self.index = 0 # number of periods of the schedule that has passed
|
||||
|
||||
# parameters that do not affect scheduling logic
|
||||
self.last_run = None # time of last run, only used for debug
|
||||
self.completed_runs = 0 # number of times schedule is known to run
|
||||
self.name = name
|
||||
self.data = data # used by caller to know what to run
|
||||
|
||||
@property
|
||||
def next_run(self):
|
||||
"Time until the next run with t=0 being the global_start of the scheduler class"
|
||||
return (self.index + 1) * self.interval + self.offset
|
||||
|
||||
def due_to_run(self, relative_time):
|
||||
return bool(self.next_run <= relative_time)
|
||||
|
||||
def expected_runs(self, relative_time):
|
||||
return int((relative_time - self.offset) / self.interval)
|
||||
|
||||
def mark_run(self, relative_time):
|
||||
self.last_run = relative_time
|
||||
self.completed_runs += 1
|
||||
new_index = self.expected_runs(relative_time)
|
||||
if new_index > self.index + 1:
|
||||
logger.warning(f'Missed {new_index - self.index - 1} schedules of {self.name}')
|
||||
self.index = new_index
|
||||
|
||||
def missed_runs(self, relative_time):
|
||||
"Number of times job was supposed to ran but failed to, only used for debug"
|
||||
missed_ct = self.expected_runs(relative_time) - self.completed_runs
|
||||
# if this is currently due to run do not count that as a missed run
|
||||
if missed_ct and self.due_to_run(relative_time):
|
||||
missed_ct -= 1
|
||||
return missed_ct
|
||||
|
||||
|
||||
class Scheduler:
|
||||
def __init__(self, schedule):
|
||||
"""
|
||||
Expects schedule in the form of a dictionary like
|
||||
{
|
||||
'job1': {'schedule': timedelta(seconds=50), 'other': 'stuff'}
|
||||
}
|
||||
Only the schedule nearest-second value is used for scheduling,
|
||||
the rest of the data is for use by the caller to know what to run.
|
||||
"""
|
||||
self.jobs = [ScheduledTask(name, data) for name, data in schedule.items()]
|
||||
min_interval = min(job.interval for job in self.jobs)
|
||||
num_jobs = len(self.jobs)
|
||||
|
||||
# this is intentionally oppioniated against spammy schedules
|
||||
# a core goal is to spread out the scheduled tasks (for worker management)
|
||||
# and high-frequency schedules just do not work with that
|
||||
if num_jobs > min_interval:
|
||||
raise RuntimeError(f'Number of schedules ({num_jobs}) is more than the shortest schedule interval ({min_interval} seconds).')
|
||||
|
||||
# even space out jobs over the base interval
|
||||
for i, job in enumerate(self.jobs):
|
||||
job.offset = (i * min_interval) // num_jobs
|
||||
|
||||
# internally times are all referenced relative to startup time, add grace period
|
||||
self.global_start = time.time() + 2.0
|
||||
|
||||
def get_and_mark_pending(self, reftime=None):
|
||||
if reftime is None:
|
||||
reftime = time.time() # mostly for tests
|
||||
relative_time = reftime - self.global_start
|
||||
to_run = []
|
||||
for job in self.jobs:
|
||||
if job.due_to_run(relative_time):
|
||||
to_run.append(job)
|
||||
logger.debug(f'scheduler found {job.name} to run, {relative_time - job.next_run} seconds after target')
|
||||
job.mark_run(relative_time)
|
||||
return to_run
|
||||
|
||||
def time_until_next_run(self, reftime=None):
|
||||
if reftime is None:
|
||||
reftime = time.time() # mostly for tests
|
||||
relative_time = reftime - self.global_start
|
||||
next_job = min(self.jobs, key=lambda j: j.next_run)
|
||||
delta = next_job.next_run - relative_time
|
||||
if delta <= 0.1:
|
||||
# careful not to give 0 or negative values to the select timeout, which has unclear interpretation
|
||||
logger.warning(f'Scheduler next run of {next_job.name} is {-delta} seconds in the past')
|
||||
return 0.1
|
||||
elif delta > 20.0:
|
||||
logger.warning(f'Scheduler next run unexpectedly over 20 seconds in future: {delta}')
|
||||
return 20.0
|
||||
logger.debug(f'Scheduler next run is {next_job.name} in {delta} seconds')
|
||||
return delta
|
||||
|
||||
def debug(self, *args, **kwargs):
|
||||
data = dict()
|
||||
data['title'] = 'Scheduler status'
|
||||
reftime = time.time()
|
||||
|
||||
now = datetime.fromtimestamp(reftime).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
start_time = datetime.fromtimestamp(self.global_start).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
relative_time = reftime - self.global_start
|
||||
data['started_time'] = start_time
|
||||
data['current_time'] = now
|
||||
data['current_time_relative'] = round(relative_time, 3)
|
||||
data['total_schedules'] = len(self.jobs)
|
||||
|
||||
data['schedule_list'] = dict(
|
||||
[
|
||||
(
|
||||
job.name,
|
||||
dict(
|
||||
last_run_seconds_ago=round(relative_time - job.last_run, 3) if job.last_run else None,
|
||||
next_run_in_seconds=round(job.next_run - relative_time, 3),
|
||||
offset_in_seconds=job.offset,
|
||||
completed_runs=job.completed_runs,
|
||||
missed_runs=job.missed_runs(relative_time),
|
||||
),
|
||||
)
|
||||
for job in sorted(self.jobs, key=lambda job: job.interval)
|
||||
]
|
||||
)
|
||||
return yaml.safe_dump(data, default_flow_style=False, sort_keys=False)
|
||||
@@ -1,583 +1,54 @@
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from datetime import datetime, timezone
|
||||
from uuid import uuid4
|
||||
import json
|
||||
|
||||
import collections
|
||||
from multiprocessing import Process
|
||||
from multiprocessing import Queue as MPQueue
|
||||
from queue import Full as QueueFull, Empty as QueueEmpty
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import connection as django_connection, connections
|
||||
from django.db import connection as django_connection
|
||||
from django.core.cache import cache as django_cache
|
||||
from django.utils.timezone import now as tz_now
|
||||
from django_guid import set_guid
|
||||
from jinja2 import Template
|
||||
import psutil
|
||||
|
||||
from ansible_base.lib.logging.runtime import log_excess_runtime
|
||||
|
||||
from awx.main.models import UnifiedJob
|
||||
from awx.main.dispatch import reaper
|
||||
from awx.main.utils.common import get_mem_effective_capacity, get_corrected_memory, get_corrected_cpu, get_cpu_effective_capacity
|
||||
|
||||
# ansible-runner
|
||||
from ansible_runner.utils.capacity import get_mem_in_bytes, get_cpu_count
|
||||
|
||||
if 'run_callback_receiver' in sys.argv:
|
||||
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
||||
else:
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
RETIRED_SENTINEL_TASK = "[retired]"
|
||||
|
||||
|
||||
class NoOpResultQueue(object):
|
||||
def put(self, item):
|
||||
pass
|
||||
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
||||
|
||||
|
||||
class PoolWorker(object):
|
||||
"""
|
||||
Used to track a worker child process and its pending and finished messages.
|
||||
A simple wrapper around a multiprocessing.Process that tracks a worker child process.
|
||||
|
||||
This class makes use of two distinct multiprocessing.Queues to track state:
|
||||
|
||||
- self.queue: this is a queue which represents pending messages that should
|
||||
be handled by this worker process; as new AMQP messages come
|
||||
in, a pool will put() them into this queue; the child
|
||||
process that is forked will get() from this queue and handle
|
||||
received messages in an endless loop
|
||||
- self.finished: this is a queue which the worker process uses to signal
|
||||
that it has finished processing a message
|
||||
|
||||
When a message is put() onto this worker, it is tracked in
|
||||
self.managed_tasks.
|
||||
|
||||
Periodically, the worker will call .calculate_managed_tasks(), which will
|
||||
cause messages in self.finished to be removed from self.managed_tasks.
|
||||
|
||||
In this way, self.managed_tasks represents a view of the messages assigned
|
||||
to a specific process. The message at [0] is the least-recently inserted
|
||||
message, and it represents what the worker is running _right now_
|
||||
(self.current_task).
|
||||
|
||||
A worker is "busy" when it has at least one message in self.managed_tasks.
|
||||
It is "idle" when self.managed_tasks is empty.
|
||||
The worker process runs the provided target function.
|
||||
"""
|
||||
|
||||
track_managed_tasks = False
|
||||
|
||||
def __init__(self, queue_size, target, args, **kwargs):
|
||||
self.messages_sent = 0
|
||||
self.messages_finished = 0
|
||||
self.managed_tasks = collections.OrderedDict()
|
||||
self.finished = MPQueue(queue_size) if self.track_managed_tasks else NoOpResultQueue()
|
||||
self.queue = MPQueue(queue_size)
|
||||
self.process = Process(target=target, args=(self.queue, self.finished) + args)
|
||||
def __init__(self, target, args):
|
||||
self.process = Process(target=target, args=args)
|
||||
self.process.daemon = True
|
||||
self.creation_time = time.monotonic()
|
||||
self.retiring = False
|
||||
|
||||
def start(self):
|
||||
self.process.start()
|
||||
|
||||
def put(self, body):
|
||||
if self.retiring:
|
||||
uuid = body.get('uuid', 'N/A') if isinstance(body, dict) else 'N/A'
|
||||
logger.info(f"Worker pid:{self.pid} is retiring. Refusing new task {uuid}.")
|
||||
raise QueueFull("Worker is retiring and not accepting new tasks") # AutoscalePool.write handles QueueFull
|
||||
uuid = '?'
|
||||
if isinstance(body, dict):
|
||||
if not body.get('uuid'):
|
||||
body['uuid'] = str(uuid4())
|
||||
uuid = body['uuid']
|
||||
if self.track_managed_tasks:
|
||||
self.managed_tasks[uuid] = body
|
||||
self.queue.put(body, block=True, timeout=5)
|
||||
self.messages_sent += 1
|
||||
self.calculate_managed_tasks()
|
||||
|
||||
def quit(self):
|
||||
"""
|
||||
Send a special control message to the worker that tells it to exit
|
||||
gracefully.
|
||||
"""
|
||||
self.queue.put('QUIT')
|
||||
|
||||
@property
|
||||
def age(self):
|
||||
"""Returns the current age of the worker in seconds."""
|
||||
return time.monotonic() - self.creation_time
|
||||
|
||||
@property
|
||||
def pid(self):
|
||||
return self.process.pid
|
||||
|
||||
@property
|
||||
def qsize(self):
|
||||
return self.queue.qsize()
|
||||
|
||||
@property
|
||||
def alive(self):
|
||||
return self.process.is_alive()
|
||||
|
||||
@property
|
||||
def mb(self):
|
||||
if self.alive:
|
||||
return '{:0.3f}'.format(psutil.Process(self.pid).memory_info().rss / 1024.0 / 1024.0)
|
||||
return '0'
|
||||
|
||||
@property
|
||||
def exitcode(self):
|
||||
return str(self.process.exitcode)
|
||||
|
||||
def calculate_managed_tasks(self):
|
||||
if not self.track_managed_tasks:
|
||||
return
|
||||
# look to see if any tasks were finished
|
||||
finished = []
|
||||
for _ in range(self.finished.qsize()):
|
||||
try:
|
||||
finished.append(self.finished.get(block=False))
|
||||
except QueueEmpty:
|
||||
break # qsize is not always _totally_ up to date
|
||||
|
||||
# if any tasks were finished, removed them from the managed tasks for
|
||||
# this worker
|
||||
for uuid in finished:
|
||||
try:
|
||||
del self.managed_tasks[uuid]
|
||||
self.messages_finished += 1
|
||||
except KeyError:
|
||||
# ansible _sometimes_ appears to send events w/ duplicate UUIDs;
|
||||
# UUIDs for ansible events are *not* actually globally unique
|
||||
# when this occurs, it's _fine_ to ignore this KeyError because
|
||||
# the purpose of self.managed_tasks is to just track internal
|
||||
# state of which events are *currently* being processed.
|
||||
logger.warning('Event UUID {} appears to be have been duplicated.'.format(uuid))
|
||||
if self.retiring:
|
||||
self.managed_tasks[RETIRED_SENTINEL_TASK] = {'task': RETIRED_SENTINEL_TASK}
|
||||
|
||||
@property
|
||||
def current_task(self):
|
||||
if not self.track_managed_tasks:
|
||||
return None
|
||||
self.calculate_managed_tasks()
|
||||
# the task at [0] is the one that's running right now (or is about to
|
||||
# be running)
|
||||
if len(self.managed_tasks):
|
||||
return self.managed_tasks[list(self.managed_tasks.keys())[0]]
|
||||
|
||||
return None
|
||||
|
||||
@property
|
||||
def orphaned_tasks(self):
|
||||
if not self.track_managed_tasks:
|
||||
return []
|
||||
orphaned = []
|
||||
if not self.alive:
|
||||
# if this process had a running task that never finished,
|
||||
# requeue its error callbacks
|
||||
current_task = self.current_task
|
||||
if isinstance(current_task, dict):
|
||||
orphaned.extend(current_task.get('errbacks', []))
|
||||
|
||||
# if this process has any pending messages requeue them
|
||||
for _ in range(self.qsize):
|
||||
try:
|
||||
message = self.queue.get(block=False)
|
||||
if message != 'QUIT':
|
||||
orphaned.append(message)
|
||||
except QueueEmpty:
|
||||
break # qsize is not always _totally_ up to date
|
||||
if len(orphaned):
|
||||
logger.error('requeuing {} messages from gone worker pid:{}'.format(len(orphaned), self.pid))
|
||||
return orphaned
|
||||
|
||||
@property
|
||||
def busy(self):
|
||||
self.calculate_managed_tasks()
|
||||
return len(self.managed_tasks) > 0
|
||||
|
||||
@property
|
||||
def idle(self):
|
||||
return not self.busy
|
||||
|
||||
|
||||
class StatefulPoolWorker(PoolWorker):
|
||||
track_managed_tasks = True
|
||||
|
||||
|
||||
class WorkerPool(object):
|
||||
"""
|
||||
Creates a pool of forked PoolWorkers.
|
||||
|
||||
As WorkerPool.write(...) is called (generally, by a kombu consumer
|
||||
implementation when it receives an AMQP message), messages are passed to
|
||||
one of the multiprocessing Queues where some work can be done on them.
|
||||
Each worker process runs the provided target function in an isolated process.
|
||||
The pool manages spawning, tracking, and stopping worker processes.
|
||||
|
||||
class MessagePrinter(awx.main.dispatch.worker.BaseWorker):
|
||||
|
||||
def perform_work(self, body):
|
||||
print(body)
|
||||
|
||||
pool = WorkerPool(min_workers=4) # spawn four worker processes
|
||||
pool.init_workers(MessagePrint().work_loop)
|
||||
pool.write(
|
||||
0, # preferred worker 0
|
||||
'Hello, World!'
|
||||
)
|
||||
Example:
|
||||
pool = WorkerPool(workers_num=4) # spawn four worker processes
|
||||
"""
|
||||
|
||||
pool_cls = PoolWorker
|
||||
debug_meta = ''
|
||||
def __init__(self, workers_num=None):
|
||||
self.workers_num = workers_num or settings.JOB_EVENT_WORKERS
|
||||
|
||||
def __init__(self, min_workers=None, queue_size=None):
|
||||
self.name = settings.CLUSTER_HOST_ID
|
||||
self.pid = os.getpid()
|
||||
self.min_workers = min_workers or settings.JOB_EVENT_WORKERS
|
||||
self.queue_size = queue_size or settings.JOB_EVENT_MAX_QUEUE_SIZE
|
||||
self.workers = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.workers)
|
||||
|
||||
def init_workers(self, target, *target_args):
|
||||
self.target = target
|
||||
self.target_args = target_args
|
||||
for idx in range(self.min_workers):
|
||||
self.up()
|
||||
|
||||
def up(self):
|
||||
idx = len(self.workers)
|
||||
# It's important to close these because we're _about_ to fork, and we
|
||||
# don't want the forked processes to inherit the open sockets
|
||||
# for the DB and cache connections (that way lies race conditions)
|
||||
django_connection.close()
|
||||
django_cache.close()
|
||||
worker = self.pool_cls(self.queue_size, self.target, (idx,) + self.target_args)
|
||||
self.workers.append(worker)
|
||||
try:
|
||||
worker.start()
|
||||
except Exception:
|
||||
logger.exception('could not fork')
|
||||
else:
|
||||
logger.debug('scaling up worker pid:{}'.format(worker.pid))
|
||||
return idx, worker
|
||||
|
||||
def debug(self, *args, **kwargs):
|
||||
tmpl = Template(
|
||||
'Recorded at: {{ dt }} \n'
|
||||
'{{ pool.name }}[pid:{{ pool.pid }}] workers total={{ workers|length }} {{ meta }} \n'
|
||||
'{% for w in workers %}'
|
||||
'. worker[pid:{{ w.pid }}]{% if not w.alive %} GONE exit={{ w.exitcode }}{% endif %}'
|
||||
' sent={{ w.messages_sent }}'
|
||||
' age={{ "%.0f"|format(w.age) }}s'
|
||||
' retiring={{ w.retiring }}'
|
||||
'{% if w.messages_finished %} finished={{ w.messages_finished }}{% endif %}'
|
||||
' qsize={{ w.managed_tasks|length }}'
|
||||
' rss={{ w.mb }}MB'
|
||||
'{% for task in w.managed_tasks.values() %}'
|
||||
'\n - {% if loop.index0 == 0 %}running {% if "age" in task %}for: {{ "%.1f" % task["age"] }}s {% endif %}{% else %}queued {% endif %}'
|
||||
'{{ task["uuid"] }} '
|
||||
'{% if "task" in task %}'
|
||||
'{{ task["task"].rsplit(".", 1)[-1] }}'
|
||||
# don't print kwargs, they often contain launch-time secrets
|
||||
'(*{{ task.get("args", []) }})'
|
||||
'{% endif %}'
|
||||
'{% endfor %}'
|
||||
'{% if not w.managed_tasks|length %}'
|
||||
' [IDLE]'
|
||||
'{% endif %}'
|
||||
'\n'
|
||||
'{% endfor %}'
|
||||
)
|
||||
now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
return tmpl.render(pool=self, workers=self.workers, meta=self.debug_meta, dt=now)
|
||||
|
||||
def write(self, preferred_queue, body):
|
||||
queue_order = sorted(range(len(self.workers)), key=lambda x: -1 if x == preferred_queue else x)
|
||||
write_attempt_order = []
|
||||
for queue_actual in queue_order:
|
||||
def init_workers(self, target):
|
||||
for idx in range(self.workers_num):
|
||||
# It's important to close these because we're _about_ to fork, and we
|
||||
# don't want the forked processes to inherit the open sockets
|
||||
# for the DB and cache connections (that way lies race conditions)
|
||||
django_connection.close()
|
||||
django_cache.close()
|
||||
worker = PoolWorker(target, (idx,))
|
||||
try:
|
||||
self.workers[queue_actual].put(body)
|
||||
return queue_actual
|
||||
except QueueFull:
|
||||
pass
|
||||
worker.start()
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
logger.warning("could not write to queue %s" % preferred_queue)
|
||||
logger.warning("detail: {}".format(tb))
|
||||
write_attempt_order.append(preferred_queue)
|
||||
logger.error("could not write payload to any queue, attempted order: {}".format(write_attempt_order))
|
||||
return None
|
||||
|
||||
def stop(self, signum):
|
||||
try:
|
||||
for worker in self.workers:
|
||||
os.kill(worker.pid, signum)
|
||||
except Exception:
|
||||
logger.exception('could not kill {}'.format(worker.pid))
|
||||
|
||||
|
||||
def get_auto_max_workers():
|
||||
"""Method we normally rely on to get max_workers
|
||||
|
||||
Uses almost same logic as Instance.local_health_check
|
||||
The important thing is to be MORE than Instance.capacity
|
||||
so that the task-manager does not over-schedule this node
|
||||
|
||||
Ideally we would just use the capacity from the database plus reserve workers,
|
||||
but this poses some bootstrap problems where OCP task containers
|
||||
register themselves after startup
|
||||
"""
|
||||
# Get memory from ansible-runner
|
||||
total_memory_gb = get_mem_in_bytes()
|
||||
|
||||
# This may replace memory calculation with a user override
|
||||
corrected_memory = get_corrected_memory(total_memory_gb)
|
||||
|
||||
# Get same number as max forks based on memory, this function takes memory as bytes
|
||||
mem_capacity = get_mem_effective_capacity(corrected_memory, is_control_node=True)
|
||||
|
||||
# Follow same process for CPU capacity constraint
|
||||
cpu_count = get_cpu_count()
|
||||
corrected_cpu = get_corrected_cpu(cpu_count)
|
||||
cpu_capacity = get_cpu_effective_capacity(corrected_cpu, is_control_node=True)
|
||||
|
||||
# Here is what is different from health checks,
|
||||
auto_max = max(mem_capacity, cpu_capacity)
|
||||
|
||||
# add magic number of extra workers to ensure
|
||||
# we have a few extra workers to run the heartbeat
|
||||
auto_max += 7
|
||||
|
||||
return auto_max
|
||||
|
||||
|
||||
class AutoscalePool(WorkerPool):
|
||||
"""
|
||||
An extended pool implementation that automatically scales workers up and
|
||||
down based on demand
|
||||
"""
|
||||
|
||||
pool_cls = StatefulPoolWorker
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.max_workers = kwargs.pop('max_workers', None)
|
||||
self.max_worker_lifetime_seconds = kwargs.pop(
|
||||
'max_worker_lifetime_seconds', getattr(settings, 'WORKER_MAX_LIFETIME_SECONDS', 14400)
|
||||
) # Default to 4 hours
|
||||
super(AutoscalePool, self).__init__(*args, **kwargs)
|
||||
|
||||
if self.max_workers is None:
|
||||
self.max_workers = get_auto_max_workers()
|
||||
|
||||
# max workers can't be less than min_workers
|
||||
self.max_workers = max(self.min_workers, self.max_workers)
|
||||
|
||||
# the task manager enforces settings.TASK_MANAGER_TIMEOUT on its own
|
||||
# but if the task takes longer than the time defined here, we will force it to stop here
|
||||
self.task_manager_timeout = settings.TASK_MANAGER_TIMEOUT + settings.TASK_MANAGER_TIMEOUT_GRACE_PERIOD
|
||||
|
||||
# initialize some things for subsystem metrics periodic gathering
|
||||
# the AutoscalePool class does not save these to redis directly, but reports via produce_subsystem_metrics
|
||||
self.scale_up_ct = 0
|
||||
self.worker_count_max = 0
|
||||
|
||||
# last time we wrote current tasks, to avoid too much log spam
|
||||
self.last_task_list_log = time.monotonic()
|
||||
|
||||
def produce_subsystem_metrics(self, metrics_object):
|
||||
metrics_object.set('dispatcher_pool_scale_up_events', self.scale_up_ct)
|
||||
metrics_object.set('dispatcher_pool_active_task_count', sum(len(w.managed_tasks) for w in self.workers))
|
||||
metrics_object.set('dispatcher_pool_max_worker_count', self.worker_count_max)
|
||||
self.worker_count_max = len(self.workers)
|
||||
|
||||
@property
|
||||
def should_grow(self):
|
||||
if len(self.workers) < self.min_workers:
|
||||
# If we don't have at least min_workers, add more
|
||||
return True
|
||||
# If every worker is busy doing something, add more
|
||||
return all([w.busy for w in self.workers])
|
||||
|
||||
@property
|
||||
def full(self):
|
||||
return len(self.workers) == self.max_workers
|
||||
|
||||
@property
|
||||
def debug_meta(self):
|
||||
return 'min={} max={}'.format(self.min_workers, self.max_workers)
|
||||
|
||||
@log_excess_runtime(logger, debug_cutoff=0.05, cutoff=0.2)
|
||||
def cleanup(self):
|
||||
"""
|
||||
Perform some internal account and cleanup. This is run on
|
||||
every cluster node heartbeat:
|
||||
|
||||
1. Discover worker processes that exited, and recover messages they
|
||||
were handling.
|
||||
2. Clean up unnecessary, idle workers.
|
||||
|
||||
IMPORTANT: this function is one of the few places in the dispatcher
|
||||
(aside from setting lookups) where we talk to the database. As such,
|
||||
if there's an outage, this method _can_ throw various
|
||||
django.db.utils.Error exceptions. Act accordingly.
|
||||
"""
|
||||
orphaned = []
|
||||
for w in self.workers[::]:
|
||||
is_retirement_age = self.max_worker_lifetime_seconds is not None and w.age > self.max_worker_lifetime_seconds
|
||||
if not w.alive:
|
||||
# the worker process has exited
|
||||
# 1. take the task it was running and enqueue the error
|
||||
# callbacks
|
||||
# 2. take any pending tasks delivered to its queue and
|
||||
# send them to another worker
|
||||
logger.error('worker pid:{} is gone (exit={})'.format(w.pid, w.exitcode))
|
||||
if w.current_task:
|
||||
if w.current_task == {'task': RETIRED_SENTINEL_TASK}:
|
||||
logger.debug('scaling down worker pid:{} due to worker age: {}'.format(w.pid, w.age))
|
||||
self.workers.remove(w)
|
||||
continue
|
||||
if w.current_task != 'QUIT':
|
||||
try:
|
||||
for j in UnifiedJob.objects.filter(celery_task_id=w.current_task['uuid']):
|
||||
reaper.reap_job(j, 'failed')
|
||||
except Exception:
|
||||
logger.exception('failed to reap job UUID {}'.format(w.current_task['uuid']))
|
||||
else:
|
||||
logger.warning(f'Worker was told to quit but has not, pid={w.pid}')
|
||||
orphaned.extend(w.orphaned_tasks)
|
||||
self.workers.remove(w)
|
||||
|
||||
elif w.idle and len(self.workers) > self.min_workers:
|
||||
# the process has an empty queue (it's idle) and we have
|
||||
# more processes in the pool than we need (> min)
|
||||
# send this process a message so it will exit gracefully
|
||||
# at the next opportunity
|
||||
logger.debug('scaling down worker pid:{}'.format(w.pid))
|
||||
w.quit()
|
||||
self.workers.remove(w)
|
||||
|
||||
elif w.idle and is_retirement_age:
|
||||
logger.debug('scaling down worker pid:{} due to worker age: {}'.format(w.pid, w.age))
|
||||
w.quit()
|
||||
self.workers.remove(w)
|
||||
|
||||
elif is_retirement_age and not w.retiring and not w.idle:
|
||||
logger.info(
|
||||
f"Worker pid:{w.pid} (age: {w.age:.0f}s) exceeded max lifetime ({self.max_worker_lifetime_seconds:.0f}s). "
|
||||
"Signaling for graceful retirement."
|
||||
)
|
||||
# Send QUIT signal; worker will finish current task then exit.
|
||||
w.quit()
|
||||
# mark as retiring to reject any future tasks that might be assigned in meantime
|
||||
w.retiring = True
|
||||
|
||||
if w.alive:
|
||||
# if we discover a task manager invocation that's been running
|
||||
# too long, reap it (because otherwise it'll just hold the postgres
|
||||
# advisory lock forever); the goal of this code is to discover
|
||||
# deadlocks or other serious issues in the task manager that cause
|
||||
# the task manager to never do more work
|
||||
current_task = w.current_task
|
||||
if current_task and isinstance(current_task, dict):
|
||||
endings = ('tasks.task_manager', 'tasks.dependency_manager', 'tasks.workflow_manager')
|
||||
current_task_name = current_task.get('task', '')
|
||||
if current_task_name.endswith(endings):
|
||||
if 'started' not in current_task:
|
||||
w.managed_tasks[current_task['uuid']]['started'] = time.time()
|
||||
age = time.time() - current_task['started']
|
||||
w.managed_tasks[current_task['uuid']]['age'] = age
|
||||
if age > self.task_manager_timeout:
|
||||
logger.error(f'{current_task_name} has held the advisory lock for {age}, sending SIGUSR1 to {w.pid}')
|
||||
os.kill(w.pid, signal.SIGUSR1)
|
||||
|
||||
for m in orphaned:
|
||||
# if all the workers are dead, spawn at least one
|
||||
if not len(self.workers):
|
||||
self.up()
|
||||
idx = random.choice(range(len(self.workers)))
|
||||
self.write(idx, m)
|
||||
|
||||
def add_bind_kwargs(self, body):
|
||||
bind_kwargs = body.pop('bind_kwargs', [])
|
||||
body.setdefault('kwargs', {})
|
||||
if 'dispatch_time' in bind_kwargs:
|
||||
body['kwargs']['dispatch_time'] = tz_now().isoformat()
|
||||
if 'worker_tasks' in bind_kwargs:
|
||||
worker_tasks = {}
|
||||
for worker in self.workers:
|
||||
worker.calculate_managed_tasks()
|
||||
worker_tasks[worker.pid] = list(worker.managed_tasks.keys())
|
||||
body['kwargs']['worker_tasks'] = worker_tasks
|
||||
|
||||
def up(self):
|
||||
if self.full:
|
||||
# if we can't spawn more workers, just toss this message into a
|
||||
# random worker's backlog
|
||||
idx = random.choice(range(len(self.workers)))
|
||||
return idx, self.workers[idx]
|
||||
else:
|
||||
self.scale_up_ct += 1
|
||||
ret = super(AutoscalePool, self).up()
|
||||
new_worker_ct = len(self.workers)
|
||||
if new_worker_ct > self.worker_count_max:
|
||||
self.worker_count_max = new_worker_ct
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def fast_task_serialization(current_task):
|
||||
try:
|
||||
return str(current_task.get('task')) + ' - ' + str(sorted(current_task.get('args', []))) + ' - ' + str(sorted(current_task.get('kwargs', {})))
|
||||
except Exception:
|
||||
# just make sure this does not make things worse
|
||||
return str(current_task)
|
||||
|
||||
def write(self, preferred_queue, body):
|
||||
if 'guid' in body:
|
||||
set_guid(body['guid'])
|
||||
try:
|
||||
if isinstance(body, dict) and body.get('bind_kwargs'):
|
||||
self.add_bind_kwargs(body)
|
||||
if self.should_grow:
|
||||
self.up()
|
||||
# we don't care about "preferred queue" round robin distribution, just
|
||||
# find the first non-busy worker and claim it
|
||||
workers = self.workers[:]
|
||||
random.shuffle(workers)
|
||||
for w in workers:
|
||||
if not w.busy:
|
||||
w.put(body)
|
||||
break
|
||||
logger.exception('could not fork')
|
||||
else:
|
||||
task_name = 'unknown'
|
||||
if isinstance(body, dict):
|
||||
task_name = body.get('task')
|
||||
logger.warning(f'Workers maxed, queuing {task_name}, load: {sum(len(w.managed_tasks) for w in self.workers)} / {len(self.workers)}')
|
||||
# Once every 10 seconds write out task list for debugging
|
||||
if time.monotonic() - self.last_task_list_log >= 10.0:
|
||||
task_counts = {}
|
||||
for worker in self.workers:
|
||||
task_slug = self.fast_task_serialization(worker.current_task)
|
||||
task_counts.setdefault(task_slug, 0)
|
||||
task_counts[task_slug] += 1
|
||||
logger.info(f'Running tasks by count:\n{json.dumps(task_counts, indent=2)}')
|
||||
self.last_task_list_log = time.monotonic()
|
||||
return super(AutoscalePool, self).write(preferred_queue, body)
|
||||
except Exception:
|
||||
for conn in connections.all():
|
||||
# If the database connection has a hiccup, re-establish a new
|
||||
# connection
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
logger.exception('failed to write inbound message')
|
||||
logger.debug('scaling up worker pid:{}'.format(worker.process.pid))
|
||||
|
||||
@@ -18,7 +18,7 @@ django.setup() # noqa
|
||||
from django.conf import settings
|
||||
|
||||
# Preload all periodic tasks so their imports will be in shared memory
|
||||
for name, options in settings.CELERYBEAT_SCHEDULE.items():
|
||||
for name, options in settings.DISPATCHER_SCHEDULE.items():
|
||||
resolve_callable(options['task'])
|
||||
|
||||
|
||||
@@ -1,163 +0,0 @@
|
||||
import inspect
|
||||
import logging
|
||||
import json
|
||||
import time
|
||||
from uuid import uuid4
|
||||
|
||||
from dispatcherd.publish import submit_task
|
||||
from dispatcherd.processors.blocker import Blocker
|
||||
from dispatcherd.utils import resolve_callable
|
||||
|
||||
from django_guid import get_guid
|
||||
from django.conf import settings
|
||||
|
||||
from . import pg_bus_conn
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
def serialize_task(f):
|
||||
return '.'.join([f.__module__, f.__name__])
|
||||
|
||||
|
||||
class task:
|
||||
"""
|
||||
Used to decorate a function or class so that it can be run asynchronously
|
||||
via the task dispatcher. Tasks can be simple functions:
|
||||
|
||||
@task()
|
||||
def add(a, b):
|
||||
return a + b
|
||||
|
||||
...or classes that define a `run` method:
|
||||
|
||||
@task()
|
||||
class Adder:
|
||||
def run(self, a, b):
|
||||
return a + b
|
||||
|
||||
# Tasks can be run synchronously...
|
||||
assert add(1, 1) == 2
|
||||
assert Adder().run(1, 1) == 2
|
||||
|
||||
# ...or published to a queue:
|
||||
add.apply_async([1, 1])
|
||||
Adder.apply_async([1, 1])
|
||||
|
||||
# Tasks can also define a specific target queue or use the special fan-out queue tower_broadcast:
|
||||
|
||||
@task(queue='slow-tasks')
|
||||
def snooze():
|
||||
time.sleep(10)
|
||||
|
||||
@task(queue='tower_broadcast')
|
||||
def announce():
|
||||
print("Run this everywhere!")
|
||||
|
||||
# The special parameter bind_kwargs tells the main dispatcher process to add certain kwargs
|
||||
|
||||
@task(bind_kwargs=['dispatch_time'])
|
||||
def print_time(dispatch_time=None):
|
||||
print(f"Time I was dispatched: {dispatch_time}")
|
||||
"""
|
||||
|
||||
def __init__(self, queue=None, bind_kwargs=None, timeout=None, on_duplicate=None):
|
||||
self.queue = queue
|
||||
self.bind_kwargs = bind_kwargs
|
||||
self.timeout = timeout
|
||||
self.on_duplicate = on_duplicate
|
||||
|
||||
def __call__(self, fn=None):
|
||||
queue = self.queue
|
||||
bind_kwargs = self.bind_kwargs
|
||||
timeout = self.timeout
|
||||
on_duplicate = self.on_duplicate
|
||||
|
||||
class PublisherMixin(object):
|
||||
queue = None
|
||||
|
||||
@classmethod
|
||||
def delay(cls, *args, **kwargs):
|
||||
return cls.apply_async(args, kwargs)
|
||||
|
||||
@classmethod
|
||||
def get_async_body(cls, args=None, kwargs=None, uuid=None, **kw):
|
||||
"""
|
||||
Get the python dict to become JSON data in the pg_notify message
|
||||
This same message gets passed over the dispatcher IPC queue to workers
|
||||
If a task is submitted to a multiprocessing pool, skipping pg_notify, this might be used directly
|
||||
"""
|
||||
task_id = uuid or str(uuid4())
|
||||
args = args or []
|
||||
kwargs = kwargs or {}
|
||||
obj = {'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name, 'time_pub': time.time()}
|
||||
guid = get_guid()
|
||||
if guid:
|
||||
obj['guid'] = guid
|
||||
if bind_kwargs:
|
||||
obj['bind_kwargs'] = bind_kwargs
|
||||
obj.update(**kw)
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw):
|
||||
try:
|
||||
from flags.state import flag_enabled
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
# At this point we have the import string, and submit_task wants the method, so back to that
|
||||
actual_task = resolve_callable(cls.name)
|
||||
processor_options = ()
|
||||
if on_duplicate is not None:
|
||||
processor_options = (Blocker.Params(on_duplicate=on_duplicate),)
|
||||
return submit_task(
|
||||
actual_task,
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
queue=queue,
|
||||
uuid=uuid,
|
||||
timeout=timeout,
|
||||
processor_options=processor_options,
|
||||
**kw,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(f"[DISPATCHER] Failed to check for alternative dispatcherd implementation for {cls.name}")
|
||||
# Continue with original implementation if anything fails
|
||||
pass
|
||||
|
||||
# Original implementation follows
|
||||
queue = queue or getattr(cls.queue, 'im_func', cls.queue)
|
||||
if not queue:
|
||||
msg = f'{cls.name}: Queue value required and may not be None'
|
||||
logger.error(msg)
|
||||
raise ValueError(msg)
|
||||
obj = cls.get_async_body(args=args, kwargs=kwargs, uuid=uuid, **kw)
|
||||
if callable(queue):
|
||||
queue = queue()
|
||||
if not settings.DISPATCHER_MOCK_PUBLISH:
|
||||
with pg_bus_conn() as conn:
|
||||
conn.notify(queue, json.dumps(obj))
|
||||
return (obj, queue)
|
||||
|
||||
# If the object we're wrapping *is* a class (e.g., RunJob), return
|
||||
# a *new* class that inherits from the wrapped class *and* BaseTask
|
||||
# In this way, the new class returned by our decorator is the class
|
||||
# being decorated *plus* PublisherMixin so cls.apply_async() and
|
||||
# cls.delay() work
|
||||
bases = []
|
||||
ns = {'name': serialize_task(fn), 'queue': queue}
|
||||
if inspect.isclass(fn):
|
||||
bases = list(fn.__bases__)
|
||||
ns.update(fn.__dict__)
|
||||
cls = type(fn.__name__, tuple(bases + [PublisherMixin]), ns)
|
||||
if inspect.isclass(fn):
|
||||
return cls
|
||||
|
||||
# if the object being decorated is *not* a class (it's a Python
|
||||
# function), make fn.apply_async and fn.delay proxy through to the
|
||||
# PublisherMixin we dynamically created above
|
||||
setattr(fn, 'name', cls.name)
|
||||
setattr(fn, 'apply_async', cls.apply_async)
|
||||
setattr(fn, 'delay', cls.delay)
|
||||
setattr(fn, 'get_async_body', cls.get_async_body)
|
||||
return fn
|
||||
@@ -1,9 +1,6 @@
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
from django.db.models import Q
|
||||
from django.conf import settings
|
||||
from django.utils.timezone import now as tz_now
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
|
||||
from awx.main.models import Instance, UnifiedJob, WorkflowJob
|
||||
@@ -50,26 +47,6 @@ def reap_job(j, status, job_explanation=None):
|
||||
logger.error(f'{j.log_format} is no longer {status_before}; reaping')
|
||||
|
||||
|
||||
def reap_waiting(instance=None, status='failed', job_explanation=None, grace_period=None, excluded_uuids=None, ref_time=None):
|
||||
"""
|
||||
Reap all jobs in waiting for this instance.
|
||||
"""
|
||||
if grace_period is None:
|
||||
grace_period = settings.JOB_WAITING_GRACE_PERIOD + settings.TASK_MANAGER_TIMEOUT
|
||||
|
||||
if instance is None:
|
||||
hostname = Instance.objects.my_hostname()
|
||||
else:
|
||||
hostname = instance.hostname
|
||||
if ref_time is None:
|
||||
ref_time = tz_now()
|
||||
jobs = UnifiedJob.objects.filter(status='waiting', modified__lte=ref_time - timedelta(seconds=grace_period), controller_node=hostname)
|
||||
if excluded_uuids:
|
||||
jobs = jobs.exclude(celery_task_id__in=excluded_uuids)
|
||||
for j in jobs:
|
||||
reap_job(j, status, job_explanation=job_explanation)
|
||||
|
||||
|
||||
def reap(instance=None, status='failed', job_explanation=None, excluded_uuids=None, ref_time=None):
|
||||
"""
|
||||
Reap all jobs in running for this instance.
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
from .base import AWXConsumerRedis, AWXConsumerPG, BaseWorker # noqa
|
||||
from .base import AWXConsumerRedis # noqa
|
||||
from .callback import CallbackBrokerWorker # noqa
|
||||
from .task import TaskWorker # noqa
|
||||
|
||||
@@ -4,342 +4,39 @@
|
||||
import os
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
import redis
|
||||
import json
|
||||
import psycopg
|
||||
import time
|
||||
from uuid import UUID
|
||||
from queue import Empty as QueueEmpty
|
||||
from datetime import timedelta
|
||||
|
||||
from django import db
|
||||
from django.conf import settings
|
||||
import redis.exceptions
|
||||
|
||||
from ansible_base.lib.logging.runtime import log_excess_runtime
|
||||
|
||||
from awx.main.utils.redis import get_redis_client
|
||||
from awx.main.dispatch.pool import WorkerPool
|
||||
from awx.main.dispatch.periodic import Scheduler
|
||||
from awx.main.dispatch import pg_bus_conn
|
||||
from awx.main.utils.db import set_connection_name
|
||||
import awx.main.analytics.subsystem_metrics as s_metrics
|
||||
|
||||
if 'run_callback_receiver' in sys.argv:
|
||||
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
||||
else:
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
||||
|
||||
|
||||
def signame(sig):
|
||||
return dict((k, v) for v, k in signal.__dict__.items() if v.startswith('SIG') and not v.startswith('SIG_'))[sig]
|
||||
|
||||
|
||||
class WorkerSignalHandler:
|
||||
def __init__(self):
|
||||
self.kill_now = False
|
||||
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||
|
||||
def exit_gracefully(self, *args, **kwargs):
|
||||
self.kill_now = True
|
||||
|
||||
|
||||
class AWXConsumerBase(object):
|
||||
last_stats = time.time()
|
||||
|
||||
def __init__(self, name, worker, queues=[], pool=None):
|
||||
self.should_stop = False
|
||||
class AWXConsumerRedis(object):
|
||||
|
||||
def __init__(self, name, worker):
|
||||
self.name = name
|
||||
self.total_messages = 0
|
||||
self.queues = queues
|
||||
self.worker = worker
|
||||
self.pool = pool
|
||||
if pool is None:
|
||||
self.pool = WorkerPool()
|
||||
self.pool.init_workers(self.worker.work_loop)
|
||||
self.pool = WorkerPool()
|
||||
self.pool.init_workers(worker.work_loop)
|
||||
self.redis = get_redis_client()
|
||||
|
||||
@property
|
||||
def listening_on(self):
|
||||
return f'listening on {self.queues}'
|
||||
|
||||
def control(self, body):
|
||||
logger.warning(f'Received control signal:\n{body}')
|
||||
control = body.get('control')
|
||||
if control in ('status', 'schedule', 'running', 'cancel'):
|
||||
reply_queue = body['reply_to']
|
||||
if control == 'status':
|
||||
msg = '\n'.join([self.listening_on, self.pool.debug()])
|
||||
if control == 'schedule':
|
||||
msg = self.scheduler.debug()
|
||||
elif control == 'running':
|
||||
msg = []
|
||||
for worker in self.pool.workers:
|
||||
worker.calculate_managed_tasks()
|
||||
msg.extend(worker.managed_tasks.keys())
|
||||
elif control == 'cancel':
|
||||
msg = []
|
||||
task_ids = set(body['task_ids'])
|
||||
for worker in self.pool.workers:
|
||||
task = worker.current_task
|
||||
if task and task['uuid'] in task_ids:
|
||||
logger.warn(f'Sending SIGTERM to task id={task["uuid"]}, task={task.get("task")}, args={task.get("args")}')
|
||||
os.kill(worker.pid, signal.SIGTERM)
|
||||
msg.append(task['uuid'])
|
||||
if task_ids and not msg:
|
||||
logger.info(f'Could not locate running tasks to cancel with ids={task_ids}')
|
||||
|
||||
if reply_queue is not None:
|
||||
with pg_bus_conn() as conn:
|
||||
conn.notify(reply_queue, json.dumps(msg))
|
||||
elif control == 'reload':
|
||||
for worker in self.pool.workers:
|
||||
worker.quit()
|
||||
else:
|
||||
logger.error('unrecognized control message: {}'.format(control))
|
||||
|
||||
def dispatch_task(self, body):
|
||||
"""This will place the given body into a worker queue to run method decorated as a task"""
|
||||
if isinstance(body, dict):
|
||||
body['time_ack'] = time.time()
|
||||
|
||||
if len(self.pool):
|
||||
if "uuid" in body and body['uuid']:
|
||||
try:
|
||||
queue = UUID(body['uuid']).int % len(self.pool)
|
||||
except Exception:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
else:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
else:
|
||||
queue = 0
|
||||
self.pool.write(queue, body)
|
||||
self.total_messages += 1
|
||||
|
||||
def process_task(self, body):
|
||||
"""Routes the task details in body as either a control task or a task-task"""
|
||||
if 'control' in body:
|
||||
try:
|
||||
return self.control(body)
|
||||
except Exception:
|
||||
logger.exception(f"Exception handling control message: {body}")
|
||||
return
|
||||
self.dispatch_task(body)
|
||||
|
||||
@log_excess_runtime(logger, debug_cutoff=0.05, cutoff=0.2)
|
||||
def record_statistics(self):
|
||||
if time.time() - self.last_stats > 1: # buffer stat recording to once per second
|
||||
save_data = self.pool.debug()
|
||||
try:
|
||||
self.redis.set(f'awx_{self.name}_statistics', save_data)
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
logger.warning(f'Redis connection error saving {self.name} status data:\n{exc}\nmissed data:\n{save_data}')
|
||||
except Exception:
|
||||
logger.exception(f"Unknown redis error saving {self.name} status data:\nmissed data:\n{save_data}")
|
||||
self.last_stats = time.time()
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
def run(self):
|
||||
signal.signal(signal.SIGINT, self.stop)
|
||||
signal.signal(signal.SIGTERM, self.stop)
|
||||
|
||||
# Child should implement other things here
|
||||
|
||||
def stop(self, signum, frame):
|
||||
self.should_stop = True
|
||||
logger.warning('received {}, stopping'.format(signame(signum)))
|
||||
self.worker.on_stop()
|
||||
raise SystemExit()
|
||||
|
||||
|
||||
class AWXConsumerRedis(AWXConsumerBase):
|
||||
def run(self, *args, **kwargs):
|
||||
super(AWXConsumerRedis, self).run(*args, **kwargs)
|
||||
self.worker.on_start()
|
||||
logger.info(f'Callback receiver started with pid={os.getpid()}')
|
||||
db.connection.close() # logs use database, so close connection
|
||||
|
||||
while True:
|
||||
time.sleep(60)
|
||||
|
||||
|
||||
class AWXConsumerPG(AWXConsumerBase):
|
||||
def __init__(self, *args, schedule=None, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.pg_max_wait = getattr(settings, 'DISPATCHER_DB_DOWNTOWN_TOLLERANCE', settings.DISPATCHER_DB_DOWNTIME_TOLERANCE)
|
||||
# if no successful loops have ran since startup, then we should fail right away
|
||||
self.pg_is_down = True # set so that we fail if we get database errors on startup
|
||||
init_time = time.time()
|
||||
self.pg_down_time = init_time - self.pg_max_wait # allow no grace period
|
||||
self.last_cleanup = init_time
|
||||
self.subsystem_metrics = s_metrics.DispatcherMetrics(auto_pipe_execute=False)
|
||||
self.last_metrics_gather = init_time
|
||||
self.listen_cumulative_time = 0.0
|
||||
if schedule:
|
||||
schedule = schedule.copy()
|
||||
else:
|
||||
schedule = {}
|
||||
# add control tasks to be ran at regular schedules
|
||||
# NOTE: if we run out of database connections, it is important to still run cleanup
|
||||
# so that we scale down workers and free up connections
|
||||
schedule['pool_cleanup'] = {'control': self.pool.cleanup, 'schedule': timedelta(seconds=60)}
|
||||
# record subsystem metrics for the dispatcher
|
||||
schedule['metrics_gather'] = {'control': self.record_metrics, 'schedule': timedelta(seconds=20)}
|
||||
self.scheduler = Scheduler(schedule)
|
||||
|
||||
@log_excess_runtime(logger, debug_cutoff=0.05, cutoff=0.2)
|
||||
def record_metrics(self):
|
||||
current_time = time.time()
|
||||
self.pool.produce_subsystem_metrics(self.subsystem_metrics)
|
||||
self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
|
||||
try:
|
||||
self.subsystem_metrics.pipe_execute()
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
logger.warning(f'Redis connection error saving dispatcher metrics, error:\n{exc}')
|
||||
self.listen_cumulative_time = 0.0
|
||||
self.last_metrics_gather = current_time
|
||||
|
||||
def run_periodic_tasks(self):
|
||||
"""
|
||||
Run general periodic logic, and return maximum time in seconds before
|
||||
the next requested run
|
||||
This may be called more often than that when events are consumed
|
||||
so this should be very efficient in that
|
||||
"""
|
||||
try:
|
||||
self.record_statistics() # maintains time buffer in method
|
||||
except Exception as exc:
|
||||
logger.warning(f'Failed to save dispatcher statistics {exc}')
|
||||
|
||||
# Everything benchmarks to the same original time, so that skews due to
|
||||
# runtime of the actions, themselves, do not mess up scheduling expectations
|
||||
reftime = time.time()
|
||||
|
||||
for job in self.scheduler.get_and_mark_pending(reftime=reftime):
|
||||
if 'control' in job.data:
|
||||
try:
|
||||
job.data['control']()
|
||||
except Exception:
|
||||
logger.exception(f'Error running control task {job.data}')
|
||||
elif 'task' in job.data:
|
||||
body = self.worker.resolve_callable(job.data['task']).get_async_body()
|
||||
# bypasses pg_notify for scheduled tasks
|
||||
self.dispatch_task(body)
|
||||
|
||||
if self.pg_is_down:
|
||||
logger.info('Dispatcher listener connection established')
|
||||
self.pg_is_down = False
|
||||
|
||||
self.listen_start = time.time()
|
||||
|
||||
return self.scheduler.time_until_next_run(reftime=reftime)
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
super(AWXConsumerPG, self).run(*args, **kwargs)
|
||||
|
||||
logger.info(f"Running {self.name}, workers min={self.pool.min_workers} max={self.pool.max_workers}, listening to queues {self.queues}")
|
||||
init = False
|
||||
|
||||
while True:
|
||||
try:
|
||||
with pg_bus_conn(new_connection=True) as conn:
|
||||
for queue in self.queues:
|
||||
conn.listen(queue)
|
||||
if init is False:
|
||||
self.worker.on_start()
|
||||
init = True
|
||||
# run_periodic_tasks run scheduled actions and gives time until next scheduled action
|
||||
# this is saved to the conn (PubSub) object in order to modify read timeout in-loop
|
||||
conn.select_timeout = self.run_periodic_tasks()
|
||||
# this is the main operational loop for awx-manage run_dispatcher
|
||||
for e in conn.events(yield_timeouts=True):
|
||||
self.listen_cumulative_time += time.time() - self.listen_start # for metrics
|
||||
if e is not None:
|
||||
self.process_task(json.loads(e.payload))
|
||||
conn.select_timeout = self.run_periodic_tasks()
|
||||
if self.should_stop:
|
||||
return
|
||||
except psycopg.InterfaceError:
|
||||
logger.warning("Stale Postgres message bus connection, reconnecting")
|
||||
continue
|
||||
except (db.DatabaseError, psycopg.OperationalError):
|
||||
# If we have attained stady state operation, tolerate short-term database hickups
|
||||
if not self.pg_is_down:
|
||||
logger.exception(f"Error consuming new events from postgres, will retry for {self.pg_max_wait} s")
|
||||
self.pg_down_time = time.time()
|
||||
self.pg_is_down = True
|
||||
current_downtime = time.time() - self.pg_down_time
|
||||
if current_downtime > self.pg_max_wait:
|
||||
logger.exception(f"Postgres event consumer has not recovered in {current_downtime} s, exiting")
|
||||
# Sending QUIT to multiprocess queue to signal workers to exit
|
||||
for worker in self.pool.workers:
|
||||
try:
|
||||
worker.quit()
|
||||
except Exception:
|
||||
logger.exception(f"Error sending QUIT to worker {worker}")
|
||||
raise
|
||||
# Wait for a second before next attempt, but still listen for any shutdown signals
|
||||
for i in range(10):
|
||||
if self.should_stop:
|
||||
return
|
||||
time.sleep(0.1)
|
||||
for conn in db.connections.all():
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
except Exception:
|
||||
# Log unanticipated exception in addition to writing to stderr to get timestamps and other metadata
|
||||
logger.exception('Encountered unhandled error in dispatcher main loop')
|
||||
# Sending QUIT to multiprocess queue to signal workers to exit
|
||||
for worker in self.pool.workers:
|
||||
try:
|
||||
worker.quit()
|
||||
except Exception:
|
||||
logger.exception(f"Error sending QUIT to worker {worker}")
|
||||
raise
|
||||
|
||||
|
||||
class BaseWorker(object):
|
||||
def read(self, queue):
|
||||
return queue.get(block=True, timeout=1)
|
||||
|
||||
def work_loop(self, queue, finished, idx, *args):
|
||||
ppid = os.getppid()
|
||||
signal_handler = WorkerSignalHandler()
|
||||
set_connection_name('worker') # set application_name to distinguish from other dispatcher processes
|
||||
while not signal_handler.kill_now:
|
||||
# if the parent PID changes, this process has been orphaned
|
||||
# via e.g., segfault or sigkill, we should exit too
|
||||
if os.getppid() != ppid:
|
||||
break
|
||||
try:
|
||||
body = self.read(queue)
|
||||
if body == 'QUIT':
|
||||
break
|
||||
except QueueEmpty:
|
||||
continue
|
||||
except Exception:
|
||||
logger.exception("Exception on worker {}, reconnecting: ".format(idx))
|
||||
continue
|
||||
try:
|
||||
for conn in db.connections.all():
|
||||
# If the database connection has a hiccup during the prior message, close it
|
||||
# so we can establish a new connection
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
self.perform_work(body, *args)
|
||||
except Exception:
|
||||
logger.exception(f'Unhandled exception in perform_work in worker pid={os.getpid()}')
|
||||
finally:
|
||||
if 'uuid' in body:
|
||||
uuid = body['uuid']
|
||||
finished.put(uuid)
|
||||
logger.debug('worker exiting gracefully pid:{}'.format(os.getpid()))
|
||||
|
||||
def perform_work(self, body):
|
||||
raise NotImplementedError()
|
||||
|
||||
def on_start(self):
|
||||
pass
|
||||
|
||||
def on_stop(self):
|
||||
pass
|
||||
def stop(self, signum, frame):
|
||||
logger.warning('received {}, stopping'.format(signame(signum)))
|
||||
raise SystemExit()
|
||||
|
||||
@@ -4,10 +4,12 @@ import os
|
||||
import signal
|
||||
import time
|
||||
import datetime
|
||||
from queue import Empty as QueueEmpty
|
||||
|
||||
from django.conf import settings
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils.timezone import now as tz_now
|
||||
from django import db
|
||||
from django.db import transaction, connection as django_connection
|
||||
from django_guid import set_guid
|
||||
|
||||
@@ -16,6 +18,7 @@ import psutil
|
||||
import redis
|
||||
|
||||
from awx.main.utils.redis import get_redis_client
|
||||
from awx.main.utils.db import set_connection_name
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
from awx.main.models import JobEvent, AdHocCommandEvent, ProjectUpdateEvent, InventoryUpdateEvent, SystemJobEvent, UnifiedJob
|
||||
from awx.main.constants import ACTIVE_STATES
|
||||
@@ -23,7 +26,6 @@ from awx.main.models.events import emit_event_detail
|
||||
from awx.main.utils.profiling import AWXProfiler
|
||||
from awx.main.tasks.system import events_processed_hook
|
||||
import awx.main.analytics.subsystem_metrics as s_metrics
|
||||
from .base import BaseWorker
|
||||
|
||||
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
||||
|
||||
@@ -54,7 +56,17 @@ def job_stats_wrapup(job_identifier, event=None):
|
||||
logger.exception('Worker failed to save stats or emit notifications: Job {}'.format(job_identifier))
|
||||
|
||||
|
||||
class CallbackBrokerWorker(BaseWorker):
|
||||
class WorkerSignalHandler:
|
||||
def __init__(self):
|
||||
self.kill_now = False
|
||||
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||
|
||||
def exit_gracefully(self, *args, **kwargs):
|
||||
self.kill_now = True
|
||||
|
||||
|
||||
class CallbackBrokerWorker:
|
||||
"""
|
||||
A worker implementation that deserializes callback event data and persists
|
||||
it into the database.
|
||||
@@ -65,13 +77,13 @@ class CallbackBrokerWorker(BaseWorker):
|
||||
|
||||
MAX_RETRIES = 2
|
||||
INDIVIDUAL_EVENT_RETRIES = 3
|
||||
last_stats = time.time()
|
||||
last_flush = time.time()
|
||||
total = 0
|
||||
last_event = ''
|
||||
prof = None
|
||||
|
||||
def __init__(self):
|
||||
self.last_stats = time.time()
|
||||
self.last_flush = time.time()
|
||||
self.buff = {}
|
||||
self.redis = get_redis_client()
|
||||
self.subsystem_metrics = s_metrics.CallbackReceiverMetrics(auto_pipe_execute=False)
|
||||
@@ -86,7 +98,7 @@ class CallbackBrokerWorker(BaseWorker):
|
||||
"""This needs to be obtained after forking, or else it will give the parent process"""
|
||||
return os.getpid()
|
||||
|
||||
def read(self, queue):
|
||||
def read(self):
|
||||
has_redis_error = False
|
||||
try:
|
||||
res = self.redis.blpop(self.queue_name, timeout=1)
|
||||
@@ -149,10 +161,37 @@ class CallbackBrokerWorker(BaseWorker):
|
||||
filepath = self.prof.stop()
|
||||
logger.error(f'profiling is disabled, wrote {filepath}')
|
||||
|
||||
def work_loop(self, *args, **kw):
|
||||
def work_loop(self, idx, *args):
|
||||
if settings.AWX_CALLBACK_PROFILE:
|
||||
signal.signal(signal.SIGUSR1, self.toggle_profiling)
|
||||
return super(CallbackBrokerWorker, self).work_loop(*args, **kw)
|
||||
|
||||
ppid = os.getppid()
|
||||
signal_handler = WorkerSignalHandler()
|
||||
set_connection_name('worker') # set application_name to distinguish from other dispatcher processes
|
||||
while not signal_handler.kill_now:
|
||||
# if the parent PID changes, this process has been orphaned
|
||||
# via e.g., segfault or sigkill, we should exit too
|
||||
if os.getppid() != ppid:
|
||||
break
|
||||
try:
|
||||
body = self.read() # this is only for the callback, only reading from redis.
|
||||
if body == 'QUIT':
|
||||
break
|
||||
except QueueEmpty:
|
||||
continue
|
||||
except Exception:
|
||||
logger.exception("Exception on worker {}, reconnecting: ".format(idx))
|
||||
continue
|
||||
try:
|
||||
for conn in db.connections.all():
|
||||
# If the database connection has a hiccup during the prior message, close it
|
||||
# so we can establish a new connection
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
self.perform_work(body, *args)
|
||||
except Exception:
|
||||
logger.exception(f'Unhandled exception in perform_work in worker pid={os.getpid()}')
|
||||
|
||||
logger.debug('worker exiting gracefully pid:{}'.format(os.getpid()))
|
||||
|
||||
def flush(self, force=False):
|
||||
now = tz_now()
|
||||
|
||||
@@ -1,144 +1,49 @@
|
||||
import inspect
|
||||
import logging
|
||||
import importlib
|
||||
import sys
|
||||
import traceback
|
||||
import time
|
||||
|
||||
from kubernetes.config import kube_config
|
||||
|
||||
from django.conf import settings
|
||||
from django_guid import set_guid
|
||||
|
||||
from awx.main.tasks.system import dispatch_startup, inform_cluster_of_shutdown
|
||||
|
||||
from .base import BaseWorker
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
class TaskWorker(BaseWorker):
|
||||
def resolve_callable(task):
|
||||
"""
|
||||
A worker implementation that deserializes task messages and runs native
|
||||
Python code.
|
||||
|
||||
The code that *builds* these types of messages is found in
|
||||
`awx.main.dispatch.publish`.
|
||||
Transform a dotted notation task into an imported, callable function, e.g.,
|
||||
awx.main.tasks.system.delete_inventory
|
||||
awx.main.tasks.jobs.RunProjectUpdate
|
||||
"""
|
||||
if not task.startswith('awx.'):
|
||||
raise ValueError('{} is not a valid awx task'.format(task))
|
||||
module, target = task.rsplit('.', 1)
|
||||
module = importlib.import_module(module)
|
||||
_call = None
|
||||
if hasattr(module, target):
|
||||
_call = getattr(module, target, None)
|
||||
if not (hasattr(_call, 'apply_async') and hasattr(_call, 'delay')):
|
||||
raise ValueError('{} is not decorated with @task()'.format(task))
|
||||
return _call
|
||||
|
||||
@staticmethod
|
||||
def resolve_callable(task):
|
||||
"""
|
||||
Transform a dotted notation task into an imported, callable function, e.g.,
|
||||
|
||||
awx.main.tasks.system.delete_inventory
|
||||
awx.main.tasks.jobs.RunProjectUpdate
|
||||
"""
|
||||
if not task.startswith('awx.'):
|
||||
raise ValueError('{} is not a valid awx task'.format(task))
|
||||
module, target = task.rsplit('.', 1)
|
||||
module = importlib.import_module(module)
|
||||
_call = None
|
||||
if hasattr(module, target):
|
||||
_call = getattr(module, target, None)
|
||||
if not (hasattr(_call, 'apply_async') and hasattr(_call, 'delay')):
|
||||
raise ValueError('{} is not decorated with @task()'.format(task))
|
||||
|
||||
return _call
|
||||
|
||||
@staticmethod
|
||||
def run_callable(body):
|
||||
"""
|
||||
Given some AMQP message, import the correct Python code and run it.
|
||||
"""
|
||||
task = body['task']
|
||||
uuid = body.get('uuid', '<unknown>')
|
||||
args = body.get('args', [])
|
||||
kwargs = body.get('kwargs', {})
|
||||
if 'guid' in body:
|
||||
set_guid(body.pop('guid'))
|
||||
_call = TaskWorker.resolve_callable(task)
|
||||
if inspect.isclass(_call):
|
||||
# the callable is a class, e.g., RunJob; instantiate and
|
||||
# return its `run()` method
|
||||
_call = _call().run
|
||||
|
||||
log_extra = ''
|
||||
logger_method = logger.debug
|
||||
if ('time_ack' in body) and ('time_pub' in body):
|
||||
time_publish = body['time_ack'] - body['time_pub']
|
||||
time_waiting = time.time() - body['time_ack']
|
||||
if time_waiting > 5.0 or time_publish > 5.0:
|
||||
# If task too a very long time to process, add this information to the log
|
||||
log_extra = f' took {time_publish:.4f} to ack, {time_waiting:.4f} in local dispatcher'
|
||||
logger_method = logger.info
|
||||
# don't print kwargs, they often contain launch-time secrets
|
||||
logger_method(f'task {uuid} starting {task}(*{args}){log_extra}')
|
||||
|
||||
return _call(*args, **kwargs)
|
||||
|
||||
def perform_work(self, body):
|
||||
"""
|
||||
Import and run code for a task e.g.,
|
||||
|
||||
body = {
|
||||
'args': [8],
|
||||
'callbacks': [{
|
||||
'args': [],
|
||||
'kwargs': {}
|
||||
'task': u'awx.main.tasks.system.handle_work_success'
|
||||
}],
|
||||
'errbacks': [{
|
||||
'args': [],
|
||||
'kwargs': {},
|
||||
'task': 'awx.main.tasks.system.handle_work_error'
|
||||
}],
|
||||
'kwargs': {},
|
||||
'task': u'awx.main.tasks.jobs.RunProjectUpdate'
|
||||
}
|
||||
"""
|
||||
settings.__clean_on_fork__()
|
||||
result = None
|
||||
try:
|
||||
result = self.run_callable(body)
|
||||
except Exception as exc:
|
||||
result = exc
|
||||
|
||||
try:
|
||||
if getattr(exc, 'is_awx_task_error', False):
|
||||
# Error caused by user / tracked in job output
|
||||
logger.warning("{}".format(exc))
|
||||
else:
|
||||
task = body['task']
|
||||
args = body.get('args', [])
|
||||
kwargs = body.get('kwargs', {})
|
||||
logger.exception('Worker failed to run task {}(*{}, **{}'.format(task, args, kwargs))
|
||||
except Exception:
|
||||
# It's fairly critical that this code _not_ raise exceptions on logging
|
||||
# If you configure external logging in a way that _it_ fails, there's
|
||||
# not a lot we can do here; sys.stderr.write is a final hail mary
|
||||
_, _, tb = sys.exc_info()
|
||||
traceback.print_tb(tb)
|
||||
|
||||
for callback in body.get('errbacks', []) or []:
|
||||
callback['uuid'] = body['uuid']
|
||||
self.perform_work(callback)
|
||||
finally:
|
||||
# It's frustrating that we have to do this, but the python k8s
|
||||
# client leaves behind cacert files in /tmp, so we must clean up
|
||||
# the tmpdir per-dispatcher process every time a new task comes in
|
||||
try:
|
||||
kube_config._cleanup_temp_files()
|
||||
except Exception:
|
||||
logger.exception('failed to cleanup k8s client tmp files')
|
||||
|
||||
for callback in body.get('callbacks', []) or []:
|
||||
callback['uuid'] = body['uuid']
|
||||
self.perform_work(callback)
|
||||
return result
|
||||
|
||||
def on_start(self):
|
||||
dispatch_startup()
|
||||
|
||||
def on_stop(self):
|
||||
inform_cluster_of_shutdown()
|
||||
def run_callable(body):
|
||||
"""
|
||||
Given some AMQP message, import the correct Python code and run it.
|
||||
"""
|
||||
task = body['task']
|
||||
uuid = body.get('uuid', '<unknown>')
|
||||
args = body.get('args', [])
|
||||
kwargs = body.get('kwargs', {})
|
||||
if 'guid' in body:
|
||||
set_guid(body.pop('guid'))
|
||||
_call = resolve_callable(task)
|
||||
log_extra = ''
|
||||
logger_method = logger.debug
|
||||
if 'time_pub' in body:
|
||||
time_publish = time.time() - body['time_pub']
|
||||
if time_publish > 5.0:
|
||||
# If task too a very long time to process, add this information to the log
|
||||
log_extra = f' took {time_publish:.4f} to send message'
|
||||
logger_method = logger.info
|
||||
# don't print kwargs, they often contain launch-time secrets
|
||||
logger_method(f'task {uuid} starting {task}(*{args}){log_extra}')
|
||||
return _call(*args, **kwargs)
|
||||
|
||||
@@ -428,6 +428,9 @@ class CredentialInputField(JSONSchemaField):
|
||||
# determine the defined fields for the associated credential type
|
||||
properties = {}
|
||||
for field in model_instance.credential_type.inputs.get('fields', []):
|
||||
# Prevent users from providing values for internally resolved fields
|
||||
if 'internal' in field:
|
||||
continue
|
||||
field = field.copy()
|
||||
properties[field['id']] = field
|
||||
if field.get('choices', []):
|
||||
@@ -566,6 +569,7 @@ class CredentialTypeInputField(JSONSchemaField):
|
||||
},
|
||||
'label': {'type': 'string'},
|
||||
'help_text': {'type': 'string'},
|
||||
'internal': {'type': 'boolean'},
|
||||
'multiline': {'type': 'boolean'},
|
||||
'secret': {'type': 'boolean'},
|
||||
'ask_at_runtime': {'type': 'boolean'},
|
||||
|
||||
88
awx/main/management/commands/dispatcherctl.py
Normal file
88
awx/main/management/commands/dispatcherctl.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import argparse
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db import connection
|
||||
|
||||
from dispatcherd.cli import (
|
||||
CONTROL_ARG_SCHEMAS,
|
||||
DEFAULT_CONFIG_FILE,
|
||||
_base_cli_parent,
|
||||
_control_common_parent,
|
||||
_register_control_arguments,
|
||||
_build_command_data_from_args,
|
||||
)
|
||||
from dispatcherd.config import setup as dispatcher_setup
|
||||
from dispatcherd.factories import get_control_from_settings
|
||||
from dispatcherd.service import control_tasks
|
||||
|
||||
from awx.main.dispatch.config import get_dispatcherd_config
|
||||
from awx.main.management.commands.dispatcherd import ensure_no_dispatcherd_env_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Dispatcher control operations'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.description = 'Run dispatcherd control commands using awx-manage.'
|
||||
base_parent = _base_cli_parent()
|
||||
control_parent = _control_common_parent()
|
||||
parser._add_container_actions(base_parent)
|
||||
parser._add_container_actions(control_parent)
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', metavar='command')
|
||||
subparsers.required = True
|
||||
shared_parents = [base_parent, control_parent]
|
||||
for command in control_tasks.__all__:
|
||||
func = getattr(control_tasks, command, None)
|
||||
doc = inspect.getdoc(func) or ''
|
||||
summary = doc.splitlines()[0] if doc else None
|
||||
command_parser = subparsers.add_parser(
|
||||
command,
|
||||
help=summary,
|
||||
description=doc,
|
||||
parents=shared_parents,
|
||||
)
|
||||
_register_control_arguments(command_parser, CONTROL_ARG_SCHEMAS.get(command))
|
||||
|
||||
def handle(self, *args, **options):
|
||||
command = options.pop('command', None)
|
||||
if not command:
|
||||
raise CommandError('No dispatcher control command specified')
|
||||
|
||||
for django_opt in ('verbosity', 'traceback', 'no_color', 'force_color', 'skip_checks'):
|
||||
options.pop(django_opt, None)
|
||||
|
||||
log_level = options.pop('log_level', 'DEBUG')
|
||||
config_path = os.path.abspath(options.pop('config', DEFAULT_CONFIG_FILE))
|
||||
expected_replies = options.pop('expected_replies', 1)
|
||||
|
||||
logging.basicConfig(level=getattr(logging, log_level), stream=sys.stdout)
|
||||
logger.debug(f"Configured standard out logging at {log_level} level")
|
||||
|
||||
default_config = os.path.abspath(DEFAULT_CONFIG_FILE)
|
||||
ensure_no_dispatcherd_env_config()
|
||||
if config_path != default_config:
|
||||
raise CommandError('The config path CLI option is not allowed for the awx-manage command')
|
||||
if connection.vendor == 'sqlite':
|
||||
raise CommandError('dispatcherctl is not supported with sqlite3; use a PostgreSQL database')
|
||||
else:
|
||||
logger.info('Using config generated from awx.main.dispatch.config.get_dispatcherd_config')
|
||||
dispatcher_setup(get_dispatcherd_config())
|
||||
|
||||
schema_namespace = argparse.Namespace(**options)
|
||||
data = _build_command_data_from_args(schema_namespace, command)
|
||||
|
||||
ctl = get_control_from_settings()
|
||||
returned = ctl.control_with_reply(command, data=data, expected_replies=expected_replies)
|
||||
self.stdout.write(yaml.dump(returned, default_flow_style=False))
|
||||
if len(returned) < expected_replies:
|
||||
logger.error(f'Obtained only {len(returned)} of {expected_replies}, exiting with non-zero code')
|
||||
raise CommandError('dispatcherctl returned fewer replies than expected')
|
||||
85
awx/main/management/commands/dispatcherd.py
Normal file
85
awx/main/management/commands/dispatcherd.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# Copyright (c) 2015 Ansible, Inc.
|
||||
# All Rights Reserved
|
||||
import copy
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache as django_cache
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db import connection
|
||||
|
||||
from dispatcherd.config import setup as dispatcher_setup
|
||||
|
||||
from awx.main.dispatch.config import get_dispatcherd_config
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
from dispatcherd import run_service
|
||||
|
||||
|
||||
def _json_default(value):
|
||||
if isinstance(value, set):
|
||||
return sorted(value)
|
||||
if isinstance(value, tuple):
|
||||
return list(value)
|
||||
return str(value)
|
||||
|
||||
|
||||
def _hash_config(config):
|
||||
serialized = json.dumps(config, sort_keys=True, separators=(',', ':'), default=_json_default)
|
||||
return hashlib.sha256(serialized.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def ensure_no_dispatcherd_env_config():
|
||||
if os.getenv('DISPATCHERD_CONFIG_FILE'):
|
||||
raise CommandError('DISPATCHERD_CONFIG_FILE is set but awx-manage dispatcherd uses dynamic config from code')
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = (
|
||||
'Run the background task service, this is the supported entrypoint since the introduction of dispatcherd as a library. '
|
||||
'This replaces the prior awx-manage run_dispatcher service, and control actions are at awx-manage dispatcherctl.'
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
return
|
||||
|
||||
def handle(self, *arg, **options):
|
||||
ensure_no_dispatcherd_env_config()
|
||||
|
||||
self.configure_dispatcher_logging()
|
||||
config = get_dispatcherd_config(for_service=True)
|
||||
config_hash = _hash_config(config)
|
||||
logger.info(
|
||||
'Using dispatcherd config generated from awx.main.dispatch.config.get_dispatcherd_config (sha256=%s)',
|
||||
config_hash,
|
||||
)
|
||||
|
||||
# Close the connection, because the pg_notify broker will create new async connection
|
||||
connection.close()
|
||||
django_cache.close()
|
||||
dispatcher_setup(config)
|
||||
|
||||
run_service()
|
||||
|
||||
def configure_dispatcher_logging(self):
|
||||
# Apply special log rule for the parent process
|
||||
special_logging = copy.deepcopy(settings.LOGGING)
|
||||
changed_handlers = []
|
||||
for handler_name, handler_config in special_logging.get('handlers', {}).items():
|
||||
filters = handler_config.get('filters', [])
|
||||
if 'dynamic_level_filter' in filters:
|
||||
handler_config['filters'] = [flt for flt in filters if flt != 'dynamic_level_filter']
|
||||
changed_handlers.append(handler_name)
|
||||
logger.info(f'Dispatcherd main process replaced log level filter for handlers: {changed_handlers}')
|
||||
|
||||
# Apply the custom logging level here, before the asyncio code starts
|
||||
special_logging.setdefault('loggers', {}).setdefault('dispatcherd', {})
|
||||
special_logging['loggers']['dispatcherd']['level'] = settings.LOG_AGGREGATOR_LEVEL
|
||||
|
||||
logging.config.dictConfig(special_logging)
|
||||
@@ -4,7 +4,7 @@ import json
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from awx.main.dispatch import pg_bus_conn
|
||||
from awx.main.dispatch.worker.task import TaskWorker
|
||||
from awx.main.dispatch.worker.task import run_callable
|
||||
|
||||
logger = logging.getLogger('awx.main.cache_clear')
|
||||
|
||||
@@ -21,11 +21,11 @@ class Command(BaseCommand):
|
||||
try:
|
||||
with pg_bus_conn() as conn:
|
||||
conn.listen("tower_settings_change")
|
||||
for e in conn.events(yield_timeouts=True):
|
||||
for e in conn.events():
|
||||
if e is not None:
|
||||
body = json.loads(e.payload)
|
||||
logger.info(f"Cache clear request received. Clearing now, payload: {e.payload}")
|
||||
TaskWorker.run_callable(body)
|
||||
run_callable(body)
|
||||
|
||||
except Exception:
|
||||
# Log unanticipated exception in addition to writing to stderr to get timestamps and other metadata
|
||||
|
||||
@@ -3,13 +3,12 @@
|
||||
|
||||
import redis
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
import redis.exceptions
|
||||
|
||||
from awx.main.analytics.subsystem_metrics import CallbackReceiverMetricsServer
|
||||
from awx.main.dispatch.control import Control
|
||||
from awx.main.dispatch.worker import AWXConsumerRedis, CallbackBrokerWorker
|
||||
from awx.main.utils.redis import get_redis_client
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@@ -26,7 +25,7 @@ class Command(BaseCommand):
|
||||
|
||||
def handle(self, *arg, **options):
|
||||
if options.get('status'):
|
||||
print(Control('callback_receiver').status())
|
||||
print(self.status())
|
||||
return
|
||||
consumer = None
|
||||
|
||||
@@ -36,13 +35,16 @@ class Command(BaseCommand):
|
||||
raise CommandError(f'Callback receiver could not connect to redis, error: {exc}')
|
||||
|
||||
try:
|
||||
consumer = AWXConsumerRedis(
|
||||
'callback_receiver',
|
||||
CallbackBrokerWorker(),
|
||||
queues=[getattr(settings, 'CALLBACK_QUEUE', '')],
|
||||
)
|
||||
consumer = AWXConsumerRedis('callback_receiver', CallbackBrokerWorker())
|
||||
consumer.run()
|
||||
except KeyboardInterrupt:
|
||||
print('Terminating Callback Receiver')
|
||||
if consumer:
|
||||
consumer.stop()
|
||||
|
||||
def status(self, *args, **kwargs):
|
||||
r = get_redis_client()
|
||||
workers = []
|
||||
for key in r.keys('awx_callback_receiver_statistics_*'):
|
||||
workers.append(r.get(key).decode('utf-8'))
|
||||
return '\n'.join(workers)
|
||||
|
||||
@@ -1,46 +1,24 @@
|
||||
# Copyright (c) 2015 Ansible, Inc.
|
||||
# All Rights Reserved.
|
||||
import logging
|
||||
import logging.config
|
||||
|
||||
import yaml
|
||||
import copy
|
||||
|
||||
import redis
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import connection
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core.cache import cache as django_cache
|
||||
|
||||
from flags.state import flag_enabled
|
||||
from django.core.management.base import CommandError
|
||||
|
||||
from dispatcherd.factories import get_control_from_settings
|
||||
from dispatcherd import run_service
|
||||
from dispatcherd.config import setup as dispatcher_setup
|
||||
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.config import get_dispatcherd_config
|
||||
from awx.main.dispatch.control import Control
|
||||
from awx.main.dispatch.pool import AutoscalePool
|
||||
from awx.main.dispatch.worker import AWXConsumerPG, TaskWorker
|
||||
from awx.main.analytics.subsystem_metrics import DispatcherMetricsServer
|
||||
from awx.main.management.commands.dispatcherd import Command as DispatcherdCommand
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Launch the task dispatcher'
|
||||
class Command(DispatcherdCommand):
|
||||
help = 'Launch the task dispatcher (deprecated; use awx-manage dispatcherd)'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--status', dest='status', action='store_true', help='print the internal state of any running dispatchers')
|
||||
parser.add_argument('--schedule', dest='schedule', action='store_true', help='print the current status of schedules being ran by dispatcher')
|
||||
parser.add_argument('--running', dest='running', action='store_true', help='print the UUIDs of any tasked managed by this dispatcher')
|
||||
parser.add_argument(
|
||||
'--reload',
|
||||
dest='reload',
|
||||
action='store_true',
|
||||
help=('cause the dispatcher to recycle all of its worker processes; running jobs will run to completion first'),
|
||||
)
|
||||
parser.add_argument(
|
||||
'--cancel',
|
||||
dest='cancel',
|
||||
@@ -50,41 +28,22 @@ class Command(BaseCommand):
|
||||
'Only running tasks can be canceled, queued tasks must be started before they can be canceled.'
|
||||
),
|
||||
)
|
||||
super().add_arguments(parser)
|
||||
|
||||
def handle(self, *arg, **options):
|
||||
def handle(self, *args, **options):
|
||||
logger.warning('awx-manage run_dispatcher is deprecated; use awx-manage dispatcherd')
|
||||
if options.get('status'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('status')
|
||||
if len(running_data) != 1:
|
||||
raise CommandError('Did not receive expected number of replies')
|
||||
print(yaml.dump(running_data[0], default_flow_style=False))
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').status())
|
||||
return
|
||||
if options.get('schedule'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
print('NOT YET IMPLEMENTED')
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').schedule())
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('status')
|
||||
if len(running_data) != 1:
|
||||
raise CommandError('Did not receive expected number of replies')
|
||||
print(yaml.dump(running_data[0], default_flow_style=False))
|
||||
return
|
||||
if options.get('running'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('running')
|
||||
print(yaml.dump(running_data, default_flow_style=False))
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').running())
|
||||
return
|
||||
if options.get('reload'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
print('NOT YET IMPLEMENTED')
|
||||
return
|
||||
else:
|
||||
return Control('dispatcher').control({'control': 'reload'})
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('running')
|
||||
print(yaml.dump(running_data, default_flow_style=False))
|
||||
return
|
||||
if options.get('cancel'):
|
||||
cancel_str = options.get('cancel')
|
||||
try:
|
||||
@@ -94,56 +53,12 @@ class Command(BaseCommand):
|
||||
if not isinstance(cancel_data, list):
|
||||
cancel_data = [cancel_str]
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
ctl = get_control_from_settings()
|
||||
results = []
|
||||
for task_id in cancel_data:
|
||||
# For each task UUID, send an individual cancel command
|
||||
result = ctl.control_with_reply('cancel', data={'uuid': task_id})
|
||||
results.append(result)
|
||||
print(yaml.dump(results, default_flow_style=False))
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').cancel(cancel_data))
|
||||
return
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
self.configure_dispatcher_logging()
|
||||
|
||||
# Close the connection, because the pg_notify broker will create new async connection
|
||||
connection.close()
|
||||
django_cache.close()
|
||||
|
||||
dispatcher_setup(get_dispatcherd_config(for_service=True))
|
||||
run_service()
|
||||
else:
|
||||
consumer = None
|
||||
|
||||
try:
|
||||
DispatcherMetricsServer().start()
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
raise CommandError(f'Dispatcher could not connect to redis, error: {exc}')
|
||||
|
||||
try:
|
||||
queues = ['tower_broadcast_all', 'tower_settings_change', get_task_queuename()]
|
||||
consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues, AutoscalePool(min_workers=4), schedule=settings.CELERYBEAT_SCHEDULE)
|
||||
consumer.run()
|
||||
except KeyboardInterrupt:
|
||||
logger.debug('Terminating Task Dispatcher')
|
||||
if consumer:
|
||||
consumer.stop()
|
||||
|
||||
def configure_dispatcher_logging(self):
|
||||
# Apply special log rule for the parent process
|
||||
special_logging = copy.deepcopy(settings.LOGGING)
|
||||
for handler_name, handler_config in special_logging.get('handlers', {}).items():
|
||||
filters = handler_config.get('filters', [])
|
||||
if 'dynamic_level_filter' in filters:
|
||||
handler_config['filters'] = [flt for flt in filters if flt != 'dynamic_level_filter']
|
||||
logger.info(f'Dispatcherd main process replaced log level filter for {handler_name} handler')
|
||||
|
||||
# Apply the custom logging level here, before the asyncio code starts
|
||||
special_logging.setdefault('loggers', {}).setdefault('dispatcherd', {})
|
||||
special_logging['loggers']['dispatcherd']['level'] = settings.LOG_AGGREGATOR_LEVEL
|
||||
|
||||
logging.config.dictConfig(special_logging)
|
||||
ctl = get_control_from_settings()
|
||||
results = []
|
||||
for task_id in cancel_data:
|
||||
# For each task UUID, send an individual cancel command
|
||||
result = ctl.control_with_reply('cancel', data={'uuid': task_id})
|
||||
results.append(result)
|
||||
print(yaml.dump(results, default_flow_style=False))
|
||||
return
|
||||
return super().handle(*args, **options)
|
||||
|
||||
@@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from awx.main.dispatch import pg_bus_conn
|
||||
from awx.main.dispatch.worker.task import TaskWorker
|
||||
from awx.main.dispatch.worker.task import run_callable
|
||||
from awx.main.utils.external_logging import reconfigure_rsyslog
|
||||
|
||||
logger = logging.getLogger('awx.main.rsyslog_configurer')
|
||||
@@ -26,7 +26,7 @@ class Command(BaseCommand):
|
||||
conn.listen("rsyslog_configurer")
|
||||
# reconfigure rsyslog on start up
|
||||
reconfigure_rsyslog()
|
||||
for e in conn.events(yield_timeouts=True):
|
||||
for e in conn.events():
|
||||
if e is not None:
|
||||
logger.info("Change in logging settings found. Restarting rsyslogd")
|
||||
# clear the cache of relevant settings then restart
|
||||
@@ -34,7 +34,7 @@ class Command(BaseCommand):
|
||||
cache.delete_many(setting_keys)
|
||||
settings._awx_conf_memoizedcache.clear()
|
||||
body = json.loads(e.payload)
|
||||
TaskWorker.run_callable(body)
|
||||
run_callable(body)
|
||||
except Exception:
|
||||
# Log unanticipated exception in addition to writing to stderr to get timestamps and other metadata
|
||||
logger.exception('Encountered unhandled error in rsyslog_configurer main loop')
|
||||
|
||||
@@ -21,6 +21,6 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(setup_tower_managed_defaults),
|
||||
migrations.RunPython(setup_rbac_role_system_administrator),
|
||||
migrations.RunPython(setup_tower_managed_defaults, migrations.RunPython.noop),
|
||||
migrations.RunPython(setup_rbac_role_system_administrator, migrations.RunPython.noop),
|
||||
]
|
||||
|
||||
@@ -98,5 +98,5 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(convert_controller_role_definitions),
|
||||
migrations.RunPython(convert_controller_role_definitions, migrations.RunPython.noop),
|
||||
]
|
||||
|
||||
@@ -3,19 +3,15 @@ from django.db import migrations, models
|
||||
from awx.main.migrations._create_system_jobs import delete_clear_tokens_sjt
|
||||
|
||||
|
||||
# --- START of function merged from 0203_rename_github_app_kind.py ---
|
||||
def update_github_app_kind(apps, schema_editor):
|
||||
"""
|
||||
Updates the 'kind' field for CredentialType records
|
||||
Updates the 'namespace' field for CredentialType records
|
||||
from 'github_app' to 'github_app_lookup'.
|
||||
This addresses a change in the entry point key for the GitHub App plugin.
|
||||
"""
|
||||
CredentialType = apps.get_model('main', 'CredentialType')
|
||||
db_alias = schema_editor.connection.alias
|
||||
CredentialType.objects.using(db_alias).filter(kind='github_app').update(kind='github_app_lookup')
|
||||
|
||||
|
||||
# --- END of function merged from 0203_rename_github_app_kind.py ---
|
||||
CredentialType.objects.using(db_alias).filter(namespace='github_app').update(namespace='github_app_lookup')
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
@@ -118,7 +114,5 @@ class Migration(migrations.Migration):
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
# --- START of operations merged from 0203_rename_github_app_kind.py ---
|
||||
migrations.RunPython(update_github_app_kind, migrations.RunPython.noop),
|
||||
# --- END of operations merged from 0203_rename_github_app_kind.py ---
|
||||
]
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
# Generated by Django 5.2.8 on 2026-02-20 03:39
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0204_squashed_deletions'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='instancegroup',
|
||||
options={
|
||||
'default_permissions': ('change', 'delete', 'view'),
|
||||
'ordering': ('pk',),
|
||||
'permissions': [('use_instancegroup', 'Can use instance group in a preference list of a resource')],
|
||||
},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='workflowjobnode',
|
||||
options={'ordering': ('pk',)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='workflowjobtemplatenode',
|
||||
options={'ordering': ('pk',)},
|
||||
),
|
||||
]
|
||||
@@ -386,7 +386,6 @@ class gce(PluginFileInjector):
|
||||
# auth related items
|
||||
ret['auth_kind'] = "serviceaccount"
|
||||
|
||||
filters = []
|
||||
# TODO: implement gce group_by options
|
||||
# gce never processed the group_by field, if it had, we would selectively
|
||||
# apply those options here, but it did not, so all groups are added here
|
||||
@@ -420,8 +419,6 @@ class gce(PluginFileInjector):
|
||||
|
||||
if keyed_groups:
|
||||
ret['keyed_groups'] = keyed_groups
|
||||
if filters:
|
||||
ret['filters'] = filters
|
||||
if compose_dict:
|
||||
ret['compose'] = compose_dict
|
||||
if inventory_source.source_regions and 'all' not in inventory_source.source_regions:
|
||||
|
||||
@@ -315,12 +315,11 @@ class PrimordialModel(HasEditsMixin, CreatedModifiedModel):
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
r = super(PrimordialModel, self).__init__(*args, **kwargs)
|
||||
super(PrimordialModel, self).__init__(*args, **kwargs)
|
||||
if self.pk:
|
||||
self._prior_values_store = self._get_fields_snapshot()
|
||||
else:
|
||||
self._prior_values_store = {}
|
||||
return r
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
update_fields = kwargs.get('update_fields', [])
|
||||
|
||||
@@ -28,6 +28,7 @@ from rest_framework.serializers import ValidationError as DRFValidationError
|
||||
from ansible_base.lib.utils.db import advisory_lock
|
||||
|
||||
# AWX
|
||||
from awx.main.constants import OIDC_CREDENTIAL_TYPE_NAMESPACES
|
||||
from awx.api.versioning import reverse
|
||||
from awx.main.fields import (
|
||||
ImplicitRoleField,
|
||||
@@ -242,6 +243,29 @@ class Credential(PasswordFieldsModel, CommonModelNameNotUnique, ResourceMixin):
|
||||
needed.append('vault_password')
|
||||
return needed
|
||||
|
||||
@functools.cached_property
|
||||
def context(self):
|
||||
"""
|
||||
Property for storing runtime context during credential resolution.
|
||||
|
||||
The context is a dict keyed by CredentialInputSource PK, where each value
|
||||
is a dict of runtime fields for that input source. Example::
|
||||
|
||||
{
|
||||
<input_source_pk>: {
|
||||
"workload_identity_token": "<jwt_token>"
|
||||
},
|
||||
<another_input_source_pk>: {
|
||||
"workload_identity_token": "<different_jwt_token>"
|
||||
},
|
||||
}
|
||||
|
||||
This structure allows each input source to have its own set of runtime
|
||||
values, avoiding conflicts when a credential has multiple input sources
|
||||
with different configurations (e.g., different JWT audiences).
|
||||
"""
|
||||
return {}
|
||||
|
||||
@cached_property
|
||||
def dynamic_input_fields(self):
|
||||
# if the credential is not yet saved we can't access the input_sources
|
||||
@@ -367,7 +391,7 @@ class Credential(PasswordFieldsModel, CommonModelNameNotUnique, ResourceMixin):
|
||||
def _get_dynamic_input(self, field_name):
|
||||
for input_source in self.input_sources.all():
|
||||
if input_source.input_field_name == field_name:
|
||||
return input_source.get_input_value()
|
||||
return input_source.get_input_value(context=self.context)
|
||||
else:
|
||||
raise ValueError('{} is not a dynamic input field'.format(field_name))
|
||||
|
||||
@@ -435,13 +459,15 @@ class CredentialType(CommonModelNameNotUnique):
|
||||
def from_db(cls, db, field_names, values):
|
||||
instance = super(CredentialType, cls).from_db(db, field_names, values)
|
||||
if instance.managed and instance.namespace and instance.kind != "external":
|
||||
native = ManagedCredentialType.registry[instance.namespace]
|
||||
instance.inputs = native.inputs
|
||||
instance.injectors = native.injectors
|
||||
instance.custom_injectors = getattr(native, 'custom_injectors', None)
|
||||
native = ManagedCredentialType.registry.get(instance.namespace)
|
||||
if native:
|
||||
instance.inputs = native.inputs
|
||||
instance.injectors = native.injectors
|
||||
instance.custom_injectors = getattr(native, 'custom_injectors', None)
|
||||
elif instance.namespace and instance.kind == "external":
|
||||
native = ManagedCredentialType.registry[instance.namespace]
|
||||
instance.inputs = native.inputs
|
||||
native = ManagedCredentialType.registry.get(instance.namespace)
|
||||
if native:
|
||||
instance.inputs = native.inputs
|
||||
|
||||
return instance
|
||||
|
||||
@@ -622,7 +648,15 @@ class CredentialInputSource(PrimordialModel):
|
||||
raise ValidationError(_('Input field must be defined on target credential (options are {}).'.format(', '.join(sorted(defined_fields)))))
|
||||
return self.input_field_name
|
||||
|
||||
def get_input_value(self):
|
||||
def get_input_value(self, context: dict | None = None):
|
||||
"""
|
||||
Retrieve the value from the external credential backend.
|
||||
|
||||
Args:
|
||||
context: Optional runtime context dict passed from the target credential.
|
||||
"""
|
||||
if context is None:
|
||||
context = {}
|
||||
backend = self.source_credential.credential_type.plugin.backend
|
||||
backend_kwargs = {}
|
||||
for field_name, value in self.source_credential.inputs.items():
|
||||
@@ -633,6 +667,17 @@ class CredentialInputSource(PrimordialModel):
|
||||
|
||||
backend_kwargs.update(self.metadata)
|
||||
|
||||
# Resolve internal fields from the per-input-source context.
|
||||
# The context dict is keyed by input source PK, e.g.:
|
||||
# {42: {"workload_identity_token": "eyJ..."}, 43: {"workload_identity_token": "eyX..."}}
|
||||
# This allows each input source to carry its own runtime values.
|
||||
input_source_context = context.get(self.pk, {})
|
||||
for field in self.source_credential.credential_type.inputs.get('fields', []):
|
||||
if field.get('internal'):
|
||||
value = input_source_context.get(field['id'])
|
||||
if value is not None:
|
||||
backend_kwargs[field['id']] = value
|
||||
|
||||
with set_environ(**settings.AWX_TASK_ENV):
|
||||
return backend(**backend_kwargs)
|
||||
|
||||
@@ -641,13 +686,20 @@ class CredentialInputSource(PrimordialModel):
|
||||
return reverse(view_name, kwargs={'pk': self.pk}, request=request)
|
||||
|
||||
|
||||
def load_credentials():
|
||||
def _is_oidc_namespace_disabled(ns):
|
||||
"""Check if a credential namespace should be skipped based on the OIDC feature flag."""
|
||||
return ns in OIDC_CREDENTIAL_TYPE_NAMESPACES and not getattr(settings, 'FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED', False)
|
||||
|
||||
|
||||
def load_credentials():
|
||||
awx_entry_points = {ep.name: ep for ep in entry_points(group='awx_plugins.managed_credentials')}
|
||||
supported_entry_points = {ep.name: ep for ep in entry_points(group='awx_plugins.managed_credentials.supported')}
|
||||
plugin_entry_points = awx_entry_points if detect_server_product_name() == 'AWX' else {**awx_entry_points, **supported_entry_points}
|
||||
|
||||
for ns, ep in plugin_entry_points.items():
|
||||
if _is_oidc_namespace_disabled(ns):
|
||||
continue
|
||||
|
||||
cred_plugin = ep.load()
|
||||
if not hasattr(cred_plugin, 'inputs'):
|
||||
setattr(cred_plugin, 'inputs', {})
|
||||
@@ -666,5 +718,8 @@ def load_credentials():
|
||||
credential_plugins = {}
|
||||
|
||||
for ns, ep in credential_plugins.items():
|
||||
if _is_oidc_namespace_disabled(ns):
|
||||
continue
|
||||
|
||||
plugin = ep.load()
|
||||
CredentialType.load_plugin(ns, plugin)
|
||||
|
||||
@@ -50,9 +50,8 @@ class HasPolicyEditsMixin(HasEditsMixin):
|
||||
abstract = True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
r = super(BaseModel, self).__init__(*args, **kwargs)
|
||||
super(BaseModel, self).__init__(*args, **kwargs)
|
||||
self._prior_values_store = self._get_fields_snapshot()
|
||||
return r
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
super(BaseModel, self).save(*args, **kwargs)
|
||||
@@ -486,6 +485,7 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin, ResourceMi
|
||||
|
||||
class Meta:
|
||||
app_label = 'main'
|
||||
ordering = ('pk',)
|
||||
permissions = [('use_instancegroup', 'Can use instance group in a preference list of a resource')]
|
||||
# Since this has no direct organization field only superuser can add, so remove add permission
|
||||
default_permissions = ('change', 'delete', 'view')
|
||||
|
||||
@@ -845,6 +845,21 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
|
||||
def get_notification_friendly_name(self):
|
||||
return "Job"
|
||||
|
||||
def get_source_hosts_for_constructed_inventory(self):
|
||||
"""Return a QuerySet of the source (input inventory) hosts for a constructed inventory.
|
||||
|
||||
Constructed inventory hosts have an instance_id pointing to the real
|
||||
host in the input inventory. This resolves those references and returns
|
||||
a proper QuerySet (never a list), suitable for use with finish_fact_cache.
|
||||
"""
|
||||
Host = JobHostSummary._meta.get_field('host').related_model
|
||||
if not self.inventory_id:
|
||||
return Host.objects.none()
|
||||
id_field = Host._meta.get_field('id')
|
||||
return Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))).only(
|
||||
*HOST_FACTS_FIELDS
|
||||
)
|
||||
|
||||
def get_hosts_for_fact_cache(self):
|
||||
"""
|
||||
Builds the queryset to use for writing or finalizing the fact cache
|
||||
@@ -852,17 +867,15 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
|
||||
For constructed inventories, that means the original (input inventory) hosts
|
||||
when slicing, that means only returning hosts in that slice
|
||||
"""
|
||||
Host = JobHostSummary._meta.get_field('host').related_model
|
||||
if not self.inventory_id:
|
||||
Host = JobHostSummary._meta.get_field('host').related_model
|
||||
return Host.objects.none()
|
||||
|
||||
if self.inventory.kind == 'constructed':
|
||||
id_field = Host._meta.get_field('id')
|
||||
host_qs = Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
|
||||
host_qs = self.get_source_hosts_for_constructed_inventory()
|
||||
else:
|
||||
host_qs = self.inventory.hosts
|
||||
host_qs = self.inventory.hosts.only(*HOST_FACTS_FIELDS)
|
||||
|
||||
host_qs = host_qs.only(*HOST_FACTS_FIELDS)
|
||||
host_qs = self.inventory.get_sliced_hosts(host_qs, self.job_slice_number, self.job_slice_count)
|
||||
return host_qs
|
||||
|
||||
|
||||
@@ -188,6 +188,16 @@ class SurveyJobTemplateMixin(models.Model):
|
||||
runtime_extra_vars.pop(variable_key)
|
||||
|
||||
if default is not None:
|
||||
# do not add variables that contain an empty string, are not required and are not present in extra_vars
|
||||
# password fields must be skipped, because default values have special behaviour
|
||||
if (
|
||||
default == ''
|
||||
and not survey_element.get('required')
|
||||
and survey_element.get('type') != 'password'
|
||||
and variable_key not in runtime_extra_vars
|
||||
):
|
||||
continue
|
||||
|
||||
decrypted_default = default
|
||||
if survey_element['type'] == "password" and isinstance(decrypted_default, str) and decrypted_default.startswith('$encrypted$'):
|
||||
decrypted_default = decrypt_value(get_encryption_key('value', pk=None), decrypted_default)
|
||||
|
||||
@@ -10,11 +10,13 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import tempfile
|
||||
from collections import OrderedDict
|
||||
|
||||
# Dispatcher
|
||||
from dispatcherd.factories import get_control_from_settings
|
||||
|
||||
# Django
|
||||
from django.conf import settings
|
||||
from django.db import models, connection, transaction
|
||||
@@ -24,7 +26,6 @@ from django.utils.translation import gettext_lazy as _
|
||||
from django.utils.timezone import now
|
||||
from django.utils.encoding import smart_str
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from flags.state import flag_enabled
|
||||
|
||||
# REST Framework
|
||||
from rest_framework.exceptions import ParseError
|
||||
@@ -39,7 +40,6 @@ from ansible_base.rbac.models import RoleEvaluation
|
||||
# AWX
|
||||
from awx.main.models.base import CommonModelNameNotUnique, PasswordFieldsModel, NotificationFieldsModel
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.control import Control as ControlDispatcher
|
||||
from awx.main.registrar import activity_stream_registrar
|
||||
from awx.main.models.mixins import TaskManagerUnifiedJobMixin, ExecutionEnvironmentMixin
|
||||
from awx.main.models.rbac import to_permissions
|
||||
@@ -918,7 +918,7 @@ class UnifiedJob(
|
||||
|
||||
# If we have a start and finished time, and haven't already calculated
|
||||
# out the time that elapsed, do so.
|
||||
if self.started and self.finished and self.elapsed == 0.0:
|
||||
if self.started and self.finished and self.elapsed == decimal.Decimal(0):
|
||||
td = self.finished - self.started
|
||||
elapsed = decimal.Decimal(td.total_seconds())
|
||||
self.elapsed = elapsed.quantize(dq)
|
||||
@@ -1354,8 +1354,6 @@ class UnifiedJob(
|
||||
status_data['instance_group_name'] = None
|
||||
elif status in ['successful', 'failed', 'canceled'] and self.finished:
|
||||
status_data['finished'] = datetime.datetime.strftime(self.finished, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
elif status == 'running':
|
||||
status_data['started'] = datetime.datetime.strftime(self.finished, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
status_data.update(self.websocket_emit_data())
|
||||
status_data['group_name'] = 'jobs'
|
||||
if getattr(self, 'unified_job_template_id', None):
|
||||
@@ -1487,53 +1485,17 @@ class UnifiedJob(
|
||||
return 'Previous Task Canceled: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (self.model_to_str(), self.name, self.id)
|
||||
return None
|
||||
|
||||
def fallback_cancel(self):
|
||||
if not self.celery_task_id:
|
||||
self.refresh_from_db(fields=['celery_task_id'])
|
||||
self.cancel_dispatcher_process()
|
||||
|
||||
def cancel_dispatcher_process(self):
|
||||
"""Returns True if dispatcher running this job acknowledged request and sent SIGTERM"""
|
||||
if not self.celery_task_id:
|
||||
return False
|
||||
|
||||
canceled = []
|
||||
# Special case for task manager (used during workflow job cancellation)
|
||||
if not connection.get_autocommit():
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
try:
|
||||
from dispatcherd.factories import get_control_from_settings
|
||||
|
||||
ctl = get_control_from_settings()
|
||||
ctl.control('cancel', data={'uuid': self.celery_task_id})
|
||||
except Exception:
|
||||
logger.exception("Error sending cancel command to new dispatcher")
|
||||
else:
|
||||
try:
|
||||
ControlDispatcher('dispatcher', self.controller_node).cancel([self.celery_task_id], with_reply=False)
|
||||
except Exception:
|
||||
logger.exception("Error sending cancel command to legacy dispatcher")
|
||||
return True # task manager itself needs to act under assumption that cancel was received
|
||||
|
||||
# Standard case with reply
|
||||
try:
|
||||
timeout = 5
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
from dispatcherd.factories import get_control_from_settings
|
||||
|
||||
ctl = get_control_from_settings()
|
||||
results = ctl.control_with_reply('cancel', data={'uuid': self.celery_task_id}, expected_replies=1, timeout=timeout)
|
||||
# Check if cancel was successful by checking if we got any results
|
||||
return bool(results and len(results) > 0)
|
||||
else:
|
||||
# Original implementation
|
||||
canceled = ControlDispatcher('dispatcher', self.controller_node).cancel([self.celery_task_id])
|
||||
except socket.timeout:
|
||||
logger.error(f'could not reach dispatcher on {self.controller_node} within {timeout}s')
|
||||
logger.info(f'Sending cancel message to pg_notify channel {self.controller_node} for task {self.celery_task_id}')
|
||||
ctl = get_control_from_settings(default_publish_channel=self.controller_node)
|
||||
ctl.control('cancel', data={'uuid': self.celery_task_id})
|
||||
except Exception:
|
||||
logger.exception("error encountered when checking task status")
|
||||
|
||||
return bool(self.celery_task_id in canceled) # True or False, whether confirmation was obtained
|
||||
logger.exception("Error sending cancel command to dispatcher")
|
||||
|
||||
def cancel(self, job_explanation=None, is_chain=False):
|
||||
if self.can_cancel:
|
||||
@@ -1556,19 +1518,13 @@ class UnifiedJob(
|
||||
# the job control process will use the cancel_flag to distinguish a shutdown from a cancel
|
||||
self.save(update_fields=cancel_fields)
|
||||
|
||||
controller_notified = False
|
||||
if self.celery_task_id:
|
||||
controller_notified = self.cancel_dispatcher_process()
|
||||
# Be extra sure we have the task id, in case job is transitioning into running right now
|
||||
if not self.celery_task_id:
|
||||
self.refresh_from_db(fields=['celery_task_id', 'controller_node'])
|
||||
|
||||
# If a SIGTERM signal was sent to the control process, and acked by the dispatcher
|
||||
# then we want to let its own cleanup change status, otherwise change status now
|
||||
if not controller_notified:
|
||||
if self.status != 'canceled':
|
||||
self.status = 'canceled'
|
||||
self.save(update_fields=['status'])
|
||||
# Avoid race condition where we have stale model from pending state but job has already started,
|
||||
# its checking signal but not cancel_flag, so re-send signal after updating cancel fields
|
||||
self.fallback_cancel()
|
||||
# send pg_notify message to cancel, will not send until transaction completes
|
||||
if self.celery_task_id:
|
||||
self.cancel_dispatcher_process()
|
||||
|
||||
return self.cancel_flag
|
||||
|
||||
|
||||
@@ -200,6 +200,7 @@ class WorkflowJobTemplateNode(WorkflowNodeBase):
|
||||
indexes = [
|
||||
models.Index(fields=['identifier']),
|
||||
]
|
||||
ordering = ('pk',)
|
||||
|
||||
def get_absolute_url(self, request=None):
|
||||
return reverse('api:workflow_job_template_node_detail', kwargs={'pk': self.pk}, request=request)
|
||||
@@ -286,6 +287,7 @@ class WorkflowJobNode(WorkflowNodeBase):
|
||||
models.Index(fields=["identifier", "workflow_job"]),
|
||||
models.Index(fields=['identifier']),
|
||||
]
|
||||
ordering = ('pk',)
|
||||
|
||||
@property
|
||||
def event_processing_finished(self):
|
||||
@@ -785,7 +787,7 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
|
||||
def cancel_dispatcher_process(self):
|
||||
# WorkflowJobs don't _actually_ run anything in the dispatcher, so
|
||||
# there's no point in asking the dispatcher if it knows about this task
|
||||
return True
|
||||
return
|
||||
|
||||
|
||||
class WorkflowApprovalTemplate(UnifiedJobTemplate, RelatedJobsMixin):
|
||||
@@ -916,6 +918,17 @@ class WorkflowApproval(UnifiedJob, JobNotificationMixin):
|
||||
ScheduleWorkflowManager().schedule()
|
||||
return reverse('api:workflow_approval_deny', kwargs={'pk': self.pk}, request=request)
|
||||
|
||||
def cancel(self, job_explanation=None, is_chain=False):
|
||||
# WorkflowApprovals have no dispatcher process (they wait for human
|
||||
# input) and are excluded from TaskManager processing, so the base
|
||||
# cancel() would only set cancel_flag without ever transitioning the
|
||||
# status. We call super() for the flag, then transition directly.
|
||||
has_already_canceled = bool(self.status == 'canceled')
|
||||
super().cancel(job_explanation=job_explanation, is_chain=is_chain)
|
||||
if self.status != 'canceled' and not has_already_canceled:
|
||||
self.status = 'canceled'
|
||||
self.save(update_fields=['status'])
|
||||
|
||||
def signal_start(self, **kwargs):
|
||||
can_start = super(WorkflowApproval, self).signal_start(**kwargs)
|
||||
self.started = self.created
|
||||
|
||||
@@ -76,10 +76,12 @@ class GrafanaBackend(AWXBaseEmailBackend, CustomNotificationBase):
|
||||
grafana_headers = {}
|
||||
if 'started' in m.body:
|
||||
try:
|
||||
epoch = datetime.datetime.utcfromtimestamp(0)
|
||||
grafana_data['time'] = grafana_data['timeEnd'] = int((dp.parse(m.body['started']).replace(tzinfo=None) - epoch).total_seconds() * 1000)
|
||||
epoch = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
|
||||
grafana_data['time'] = grafana_data['timeEnd'] = int(
|
||||
(dp.parse(m.body['started']).replace(tzinfo=datetime.timezone.utc) - epoch).total_seconds() * 1000
|
||||
)
|
||||
if m.body.get('finished'):
|
||||
grafana_data['timeEnd'] = int((dp.parse(m.body['finished']).replace(tzinfo=None) - epoch).total_seconds() * 1000)
|
||||
grafana_data['timeEnd'] = int((dp.parse(m.body['finished']).replace(tzinfo=datetime.timezone.utc) - epoch).total_seconds() * 1000)
|
||||
except ValueError:
|
||||
logger.error(smart_str(_("Error converting time {} or timeEnd {} to int.").format(m.body['started'], m.body['finished'])))
|
||||
if not self.fail_silently:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Copyright (c) 2016 Ansible, Inc.
|
||||
# All Rights Reserved.
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import requests
|
||||
@@ -84,20 +85,25 @@ class WebhookBackend(AWXBaseEmailBackend, CustomNotificationBase):
|
||||
if resp.status_code not in [301, 307]:
|
||||
break
|
||||
|
||||
# convert the url to a base64 encoded string for safe logging
|
||||
url_log_safe = base64.b64encode(url.encode('UTF-8'))
|
||||
|
||||
# get the next URL to try
|
||||
url_next = resp.headers.get("Location", None)
|
||||
url_next_log_safe = base64.b64encode(url_next.encode('UTF-8')) if url_next else b'None'
|
||||
|
||||
# we've hit a redirect. extract the redirect URL out of the first response header and try again
|
||||
logger.warning(
|
||||
f"Received a {resp.status_code} from {url}, trying to reach redirect url {resp.headers.get('Location', None)}; attempt #{retries+1}"
|
||||
)
|
||||
logger.warning(f"Received a {resp.status_code} from {url_log_safe}, trying to reach redirect url {url_next_log_safe}; attempt #{retries+1}")
|
||||
|
||||
# take the first redirect URL in the response header and try that
|
||||
url = resp.headers.get("Location", None)
|
||||
url = url_next
|
||||
|
||||
if url is None:
|
||||
err = f"Webhook notification received redirect to a blank URL from {url}. Response headers={resp.headers}"
|
||||
err = f"Webhook notification received redirect to a blank URL from {url_log_safe}. Response headers={resp.headers}"
|
||||
break
|
||||
else:
|
||||
# no break condition in the loop encountered; therefore we have hit the maximum number of retries
|
||||
err = f"Webhook notification max number of retries [{self.MAX_RETRIES}] exceeded. Failed to send webhook notification to {url}"
|
||||
err = f"Webhook notification max number of retries [{self.MAX_RETRIES}] exceeded. Failed to send webhook notification to {url_log_safe}"
|
||||
|
||||
if resp.status_code >= 400:
|
||||
err = f"Error sending webhook notification: {resp.status_code}"
|
||||
|
||||
@@ -19,13 +19,8 @@ class ActivityStreamRegistrar(object):
|
||||
pre_delete.connect(activity_stream_delete, sender=model, dispatch_uid=str(self.__class__) + str(model) + "_delete")
|
||||
|
||||
for m2mfield in model._meta.many_to_many:
|
||||
try:
|
||||
m2m_attr = getattr(model, m2mfield.name)
|
||||
m2m_changed.connect(
|
||||
activity_stream_associate, sender=m2m_attr.through, dispatch_uid=str(self.__class__) + str(m2m_attr.through) + "_associate"
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
m2m_attr = getattr(model, m2mfield.name)
|
||||
m2m_changed.connect(activity_stream_associate, sender=m2m_attr.through, dispatch_uid=str(self.__class__) + str(m2m_attr.through) + "_associate")
|
||||
|
||||
def disconnect(self, model):
|
||||
if model in self.models:
|
||||
|
||||
@@ -48,11 +48,6 @@ class SimpleDAG(object):
|
||||
'''
|
||||
self.node_to_edges_by_label = dict()
|
||||
|
||||
def __contains__(self, obj):
|
||||
if self.node['node_object'] in self.node_obj_to_node_index:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __len__(self):
|
||||
return len(self.nodes)
|
||||
|
||||
|
||||
@@ -122,8 +122,11 @@ class WorkflowDAG(SimpleDAG):
|
||||
if not job:
|
||||
continue
|
||||
elif job.can_cancel:
|
||||
cancel_finished = False
|
||||
job.cancel()
|
||||
# If the job is not yet in a terminal state after .cancel(),
|
||||
# the TaskManager still needs to process it.
|
||||
if job.status not in ('successful', 'failed', 'canceled', 'error'):
|
||||
cancel_finished = False
|
||||
return cancel_finished
|
||||
|
||||
def is_workflow_done(self):
|
||||
|
||||
@@ -19,9 +19,6 @@ from django.utils.timezone import now as tz_now
|
||||
from django.conf import settings
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
|
||||
# django-flags
|
||||
from flags.state import flag_enabled
|
||||
|
||||
from ansible_base.lib.utils.models import get_type_for_model
|
||||
|
||||
# django-ansible-base
|
||||
@@ -199,6 +196,10 @@ class WorkflowManager(TaskBase):
|
||||
workflow_job.start_args = '' # blank field to remove encrypted passwords
|
||||
workflow_job.save(update_fields=['status', 'start_args'])
|
||||
status_changed = True
|
||||
else:
|
||||
# Speed-up: schedule the task manager so it can process the
|
||||
# canceled pending jobs without waiting for the next cycle.
|
||||
ScheduleTaskManager().schedule()
|
||||
else:
|
||||
dnr_nodes = dag.mark_dnr_nodes()
|
||||
WorkflowJobNode.objects.bulk_update(dnr_nodes, ['do_not_run'])
|
||||
@@ -446,17 +447,29 @@ class TaskManager(TaskBase):
|
||||
self.controlplane_ig = self.tm_models.instance_groups.controlplane_ig
|
||||
|
||||
def process_job_dep_failures(self, task):
|
||||
"""If job depends on a job that has failed, mark as failed and handle misc stuff."""
|
||||
"""If job depends on a job that has failed or been canceled, mark as failed.
|
||||
|
||||
Returns True if a dep failure was found, False otherwise.
|
||||
"""
|
||||
for dep in task.dependent_jobs.all():
|
||||
# if we detect a failed or error dependency, go ahead and fail this task.
|
||||
if dep.status in ("error", "failed"):
|
||||
# if we detect a failed, error, or canceled dependency, go ahead and fail this task.
|
||||
if dep.status in ("error", "failed", "canceled"):
|
||||
task.status = 'failed'
|
||||
logger.warning(f'Previous task failed task: {task.id} dep: {dep.id} task manager')
|
||||
task.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
|
||||
get_type_for_model(type(dep)),
|
||||
dep.name,
|
||||
dep.id,
|
||||
)
|
||||
if dep.status == 'canceled':
|
||||
logger.warning(f'Previous task canceled, failing task: {task.id} dep: {dep.id} task manager')
|
||||
task.job_explanation = 'Previous Task Canceled: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
|
||||
get_type_for_model(type(dep)),
|
||||
dep.name,
|
||||
dep.id,
|
||||
)
|
||||
ScheduleWorkflowManager().schedule() # speedup for dependency chains in workflow, on workflow cancel
|
||||
else:
|
||||
logger.warning(f'Previous task failed, failing task: {task.id} dep: {dep.id} task manager')
|
||||
task.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
|
||||
get_type_for_model(type(dep)),
|
||||
dep.name,
|
||||
dep.id,
|
||||
)
|
||||
task.save(update_fields=['status', 'job_explanation'])
|
||||
task.websocket_emit_status('failed')
|
||||
self.pre_start_failed.append(task.id)
|
||||
@@ -523,19 +536,7 @@ class TaskManager(TaskBase):
|
||||
task.save()
|
||||
task.log_lifecycle("waiting")
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
self.control_nodes_to_notify.add(task.get_queue_name())
|
||||
else:
|
||||
# apply_async does a NOTIFY to the channel dispatcher is listening to
|
||||
# postgres will treat this as part of the transaction, which is what we want
|
||||
if task.status != 'failed' and type(task) is not WorkflowJob:
|
||||
task_cls = task._get_task_class()
|
||||
task_cls.apply_async(
|
||||
[task.pk],
|
||||
opts,
|
||||
queue=task.get_queue_name(),
|
||||
uuid=task.celery_task_id,
|
||||
)
|
||||
self.control_nodes_to_notify.add(task.get_queue_name())
|
||||
|
||||
# In exception cases, like a job failing pre-start checks, we send the websocket status message.
|
||||
# For jobs going into waiting, we omit this because of performance issues, as it should go to running quickly
|
||||
@@ -560,8 +561,17 @@ class TaskManager(TaskBase):
|
||||
logger.warning("Task manager has reached time out while processing pending jobs, exiting loop early")
|
||||
break
|
||||
|
||||
has_failed = self.process_job_dep_failures(task)
|
||||
if has_failed:
|
||||
if task.cancel_flag:
|
||||
logger.debug(f"Canceling pending task {task.log_format} because cancel_flag is set")
|
||||
task.status = 'canceled'
|
||||
task.job_explanation = gettext_noop("This job was canceled before it started.")
|
||||
task.save(update_fields=['status', 'job_explanation'])
|
||||
task.websocket_emit_status('canceled')
|
||||
self.pre_start_failed.append(task.id)
|
||||
ScheduleWorkflowManager().schedule()
|
||||
continue
|
||||
|
||||
if self.process_job_dep_failures(task):
|
||||
continue
|
||||
|
||||
blocked_by = self.job_blocked_by(task)
|
||||
@@ -729,7 +739,6 @@ class TaskManager(TaskBase):
|
||||
for workflow_approval in self.get_expired_workflow_approvals():
|
||||
self.timeout_approval_node(workflow_approval)
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
for controller_node in self.control_nodes_to_notify:
|
||||
logger.info(f'Notifying node {controller_node} of new waiting jobs.')
|
||||
dispatch_waiting_jobs.apply_async(queue=controller_node)
|
||||
for controller_node in self.control_nodes_to_notify:
|
||||
logger.info(f'Notifying node {controller_node} of new waiting jobs.')
|
||||
dispatch_waiting_jobs.apply_async(queue=controller_node)
|
||||
|
||||
@@ -4,10 +4,12 @@ import logging
|
||||
# Django
|
||||
from django.conf import settings
|
||||
|
||||
# Dispatcherd
|
||||
from dispatcherd.publish import task
|
||||
|
||||
# AWX
|
||||
from awx import MODE
|
||||
from awx.main.scheduler import TaskManager, DependencyManager, WorkflowManager
|
||||
from awx.main.dispatch.publish import task as task_awx
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
|
||||
logger = logging.getLogger('awx.main.scheduler')
|
||||
@@ -20,16 +22,16 @@ def run_manager(manager, prefix):
|
||||
manager().schedule()
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
def task_manager():
|
||||
run_manager(TaskManager, "task")
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
def dependency_manager():
|
||||
run_manager(DependencyManager, "dependency")
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
def workflow_manager():
|
||||
run_manager(WorkflowManager, "workflow")
|
||||
|
||||
@@ -277,7 +277,6 @@ class RunnerCallback:
|
||||
def artifacts_handler(self, artifact_dir):
|
||||
success, query_file_contents = try_load_query_file(artifact_dir)
|
||||
if success:
|
||||
self.delay_update(event_queries_processed=False)
|
||||
collections_info = collect_queries(query_file_contents)
|
||||
for collection, data in collections_info.items():
|
||||
version = data['version']
|
||||
@@ -301,6 +300,24 @@ class RunnerCallback:
|
||||
else:
|
||||
logger.warning(f'The file {COLLECTION_FILENAME} unexpectedly did not contain ansible_version')
|
||||
|
||||
# Write event_queries_processed and installed_collections directly
|
||||
# to the DB instead of using delay_update. delay_update defers
|
||||
# writes until the final job status save, but
|
||||
# events_processed_hook (called from both the task runner after
|
||||
# the final save and the callback receiver after the wrapup
|
||||
# event) needs event_queries_processed=False visible in the DB
|
||||
# to dispatch save_indirect_host_entries. The field defaults to
|
||||
# True, so without a direct write the hook would see True and
|
||||
# skip the dispatch. installed_collections is also written
|
||||
# directly so it is available if the callback receiver
|
||||
# dispatches before the final save.
|
||||
from awx.main.models import Job
|
||||
|
||||
db_updates = {'event_queries_processed': False}
|
||||
if 'installed_collections' in query_file_contents:
|
||||
db_updates['installed_collections'] = query_file_contents['installed_collections']
|
||||
Job.objects.filter(id=self.instance.id).update(**db_updates)
|
||||
|
||||
self.artifacts_processed = True
|
||||
|
||||
|
||||
|
||||
@@ -25,7 +25,8 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
|
||||
log_data = log_data or {}
|
||||
log_data['inventory_id'] = inventory_id
|
||||
log_data['written_ct'] = 0
|
||||
hosts_cached = []
|
||||
# Dict mapping host name -> bool (True if a fact file was written)
|
||||
hosts_cached = {}
|
||||
|
||||
# Create the fact_cache directory inside artifacts_dir
|
||||
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
|
||||
@@ -37,13 +38,14 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
|
||||
last_write_time = None
|
||||
|
||||
for host in hosts:
|
||||
hosts_cached.append(host.name)
|
||||
if not host.ansible_facts_modified or (timeout and host.ansible_facts_modified < now() - datetime.timedelta(seconds=timeout)):
|
||||
hosts_cached[host.name] = False
|
||||
continue # facts are expired - do not write them
|
||||
|
||||
filepath = os.path.join(fact_cache_dir, host.name)
|
||||
if not os.path.realpath(filepath).startswith(fact_cache_dir):
|
||||
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
|
||||
hosts_cached[host.name] = False
|
||||
continue
|
||||
|
||||
try:
|
||||
@@ -51,9 +53,18 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
|
||||
os.chmod(f.name, 0o600)
|
||||
json.dump(host.ansible_facts, f)
|
||||
log_data['written_ct'] += 1
|
||||
last_write_time = os.path.getmtime(filepath)
|
||||
# Backdate the file by 2 seconds so finish_fact_cache can reliably
|
||||
# distinguish these reference files from files updated by ansible.
|
||||
# This guarantees fact file mtime < summary file mtime even with
|
||||
# zipfile's 2-second timestamp rounding during artifact transfer.
|
||||
mtime = os.path.getmtime(filepath)
|
||||
backdated = mtime - 2
|
||||
os.utime(filepath, (backdated, backdated))
|
||||
last_write_time = backdated
|
||||
hosts_cached[host.name] = True
|
||||
except IOError:
|
||||
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
|
||||
hosts_cached[host.name] = False
|
||||
continue
|
||||
|
||||
# Write summary file directly to the artifacts_dir
|
||||
@@ -62,7 +73,6 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
|
||||
summary_data = {
|
||||
'last_write_time': last_write_time,
|
||||
'hosts_cached': hosts_cached,
|
||||
'written_ct': log_data['written_ct'],
|
||||
}
|
||||
with open(summary_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(summary_data, f, indent=2)
|
||||
@@ -74,7 +84,7 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
|
||||
msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
|
||||
add_log_data=True,
|
||||
)
|
||||
def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=None):
|
||||
def finish_fact_cache(host_qs, artifacts_dir, job_id=None, inventory_id=None, job_created=None, log_data=None):
|
||||
log_data = log_data or {}
|
||||
log_data['inventory_id'] = inventory_id
|
||||
log_data['updated_ct'] = 0
|
||||
@@ -94,8 +104,9 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
|
||||
logger.error(f'Error reading summary file at {summary_path}: {e}')
|
||||
return
|
||||
|
||||
host_names = summary.get('hosts_cached', [])
|
||||
hosts_cached = Host.objects.filter(name__in=host_names).order_by('id').iterator()
|
||||
hosts_cached_map = summary.get('hosts_cached', {})
|
||||
host_names = list(hosts_cached_map.keys())
|
||||
hosts_cached = host_qs.filter(name__in=host_names).order_by('id').iterator()
|
||||
# Path where individual fact files were written
|
||||
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
|
||||
hosts_to_update = []
|
||||
@@ -136,16 +147,35 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
|
||||
else:
|
||||
log_data['unmodified_ct'] += 1
|
||||
else:
|
||||
# File is missing. Only interpret this as "ansible cleared facts" if
|
||||
# start_fact_cache actually wrote a file for this host (i.e. the host
|
||||
# had valid, non-expired facts before the job ran). If no file was
|
||||
# ever written, the missing file is expected and not a clear signal.
|
||||
if not hosts_cached_map.get(host.name):
|
||||
log_data['unmodified_ct'] += 1
|
||||
continue
|
||||
|
||||
# if the file goes missing, ansible removed it (likely via clear_facts)
|
||||
# if the file goes missing, but the host has not started facts, then we should not clear the facts
|
||||
host.ansible_facts = {}
|
||||
host.ansible_facts_modified = now()
|
||||
hosts_to_update.append(host)
|
||||
logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
|
||||
log_data['cleared_ct'] += 1
|
||||
if job_created and host.ansible_facts_modified and host.ansible_facts_modified > job_created:
|
||||
logger.warning(
|
||||
f'Skipping fact clear for host {smart_str(host.name)} in job {job_id} '
|
||||
f'inventory {inventory_id}: host ansible_facts_modified '
|
||||
f'({host.ansible_facts_modified.isoformat()}) is after this job\'s '
|
||||
f'created time ({job_created.isoformat()}). '
|
||||
f'A concurrent job likely updated this host\'s facts while this job was running.'
|
||||
)
|
||||
log_data['unmodified_ct'] += 1
|
||||
else:
|
||||
host.ansible_facts = {}
|
||||
host.ansible_facts_modified = now()
|
||||
hosts_to_update.append(host)
|
||||
logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
|
||||
log_data['cleared_ct'] += 1
|
||||
|
||||
if len(hosts_to_update) >= 100:
|
||||
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
hosts_to_update = []
|
||||
|
||||
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
logger.debug(f'Updated {log_data["updated_ct"]} host facts for inventory {inventory_id} in job {job_id}')
|
||||
|
||||
@@ -12,7 +12,7 @@ from django.db import transaction
|
||||
# Django flags
|
||||
from flags.state import flag_enabled
|
||||
|
||||
from awx.main.dispatch.publish import task
|
||||
from dispatcherd.publish import task
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.models.indirect_managed_node_audit import IndirectManagedNodeAudit
|
||||
from awx.main.models.event_query import EventQuery
|
||||
|
||||
@@ -6,8 +6,8 @@ from django.conf import settings
|
||||
from django.db.models import Count, F
|
||||
from django.db.models.functions import TruncMonth
|
||||
from django.utils.timezone import now
|
||||
from dispatcherd.publish import task
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.publish import task as task_awx
|
||||
from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
|
||||
from awx.main.tasks.helpers import is_run_threshold_reached
|
||||
from awx.conf.license import get_license
|
||||
@@ -17,7 +17,7 @@ from awx.main.utils.db import bulk_update_sorted_by_id
|
||||
logger = logging.getLogger('awx.main.tasks.host_metrics')
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
def cleanup_host_metrics():
|
||||
if is_run_threshold_reached(getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', None), getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400):
|
||||
logger.info(f"Executing cleanup_host_metrics, last ran at {getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', '---')}")
|
||||
@@ -28,7 +28,7 @@ def cleanup_host_metrics():
|
||||
logger.info("Finished cleanup_host_metrics")
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
def host_metric_summary_monthly():
|
||||
"""Run cleanup host metrics summary monthly task each week"""
|
||||
if is_run_threshold_reached(getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', None), getattr(settings, 'HOST_METRIC_SUMMARY_TASK_INTERVAL', 7) * 86400):
|
||||
|
||||
@@ -17,7 +17,6 @@ import urllib.parse as urlparse
|
||||
|
||||
# Django
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
|
||||
# Shared code for the AWX platform
|
||||
from awx_plugins.interfaces._temporary_private_container_api import CONTAINER_ROOT, get_incontainer_path
|
||||
@@ -36,7 +35,6 @@ from dispatcherd.publish import task
|
||||
from dispatcherd.utils import serialize_task
|
||||
|
||||
# AWX
|
||||
from awx.main.dispatch.publish import task as task_awx
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.constants import (
|
||||
PRIVILEGE_ESCALATION_METHODS,
|
||||
@@ -85,6 +83,7 @@ from awx.main.utils.common import (
|
||||
create_partition,
|
||||
ScheduleWorkflowManager,
|
||||
ScheduleTaskManager,
|
||||
getattr_dne,
|
||||
)
|
||||
from awx.conf.license import get_license
|
||||
from awx.main.utils.handlers import SpecialInventoryHandler
|
||||
@@ -93,9 +92,92 @@ from awx.main.utils.update_model import update_model
|
||||
# Django flags
|
||||
from flags.state import flag_enabled
|
||||
|
||||
# Workload Identity
|
||||
from ansible_base.lib.workload_identity.controller import AutomationControllerJobScope
|
||||
from ansible_base.resource_registry.workload_identity_client import get_workload_identity_client
|
||||
|
||||
logger = logging.getLogger('awx.main.tasks.jobs')
|
||||
|
||||
|
||||
def populate_claims_for_workload(unified_job) -> dict:
|
||||
"""
|
||||
Extract JWT claims from a Controller workload for the aap_controller_automation_job scope.
|
||||
"""
|
||||
|
||||
claims = {
|
||||
AutomationControllerJobScope.CLAIM_JOB_ID: unified_job.id,
|
||||
AutomationControllerJobScope.CLAIM_JOB_NAME: unified_job.name,
|
||||
AutomationControllerJobScope.CLAIM_LAUNCH_TYPE: unified_job.launch_type,
|
||||
}
|
||||
|
||||
# Related objects in the UnifiedJob model, applies to all job types
|
||||
# null cases are omitted because of OIDC
|
||||
if organization := getattr_dne(unified_job, 'organization'):
|
||||
claims[AutomationControllerJobScope.CLAIM_ORGANIZATION_NAME] = organization.name
|
||||
claims[AutomationControllerJobScope.CLAIM_ORGANIZATION_ID] = organization.id
|
||||
|
||||
if ujt := getattr_dne(unified_job, 'unified_job_template'):
|
||||
claims[AutomationControllerJobScope.CLAIM_UNIFIED_JOB_TEMPLATE_NAME] = ujt.name
|
||||
claims[AutomationControllerJobScope.CLAIM_UNIFIED_JOB_TEMPLATE_ID] = ujt.id
|
||||
|
||||
if instance_group := getattr_dne(unified_job, 'instance_group'):
|
||||
claims[AutomationControllerJobScope.CLAIM_INSTANCE_GROUP_NAME] = instance_group.name
|
||||
claims[AutomationControllerJobScope.CLAIM_INSTANCE_GROUP_ID] = instance_group.id
|
||||
|
||||
# Related objects on concrete models, may not be valid for type of unified_job
|
||||
if inventory := getattr_dne(unified_job, 'inventory', None):
|
||||
claims[AutomationControllerJobScope.CLAIM_INVENTORY_NAME] = inventory.name
|
||||
claims[AutomationControllerJobScope.CLAIM_INVENTORY_ID] = inventory.id
|
||||
|
||||
if execution_environment := getattr_dne(unified_job, 'execution_environment', None):
|
||||
claims[AutomationControllerJobScope.CLAIM_EXECUTION_ENVIRONMENT_NAME] = execution_environment.name
|
||||
claims[AutomationControllerJobScope.CLAIM_EXECUTION_ENVIRONMENT_ID] = execution_environment.id
|
||||
|
||||
if project := getattr_dne(unified_job, 'project', None):
|
||||
claims[AutomationControllerJobScope.CLAIM_PROJECT_NAME] = project.name
|
||||
claims[AutomationControllerJobScope.CLAIM_PROJECT_ID] = project.id
|
||||
|
||||
if jt := getattr_dne(unified_job, 'job_template', None):
|
||||
claims[AutomationControllerJobScope.CLAIM_JOB_TEMPLATE_NAME] = jt.name
|
||||
claims[AutomationControllerJobScope.CLAIM_JOB_TEMPLATE_ID] = jt.id
|
||||
|
||||
# Only valid for job templates
|
||||
if hasattr(unified_job, 'playbook'):
|
||||
claims[AutomationControllerJobScope.CLAIM_PLAYBOOK_NAME] = unified_job.playbook
|
||||
|
||||
# Not valid for inventory updates and system jobs
|
||||
if hasattr(unified_job, 'job_type'):
|
||||
claims[AutomationControllerJobScope.CLAIM_JOB_TYPE] = unified_job.job_type
|
||||
|
||||
launched_by: dict = unified_job.launched_by
|
||||
if 'name' in launched_by:
|
||||
claims[AutomationControllerJobScope.CLAIM_LAUNCHED_BY_NAME] = launched_by['name']
|
||||
if 'id' in launched_by:
|
||||
claims[AutomationControllerJobScope.CLAIM_LAUNCHED_BY_ID] = launched_by['id']
|
||||
|
||||
return claims
|
||||
|
||||
|
||||
def retrieve_workload_identity_jwt(
|
||||
unified_job: UnifiedJob,
|
||||
audience: str,
|
||||
scope: str,
|
||||
workload_ttl_seconds: int | None = None,
|
||||
) -> str:
|
||||
"""Retrieve JWT token from workload claims.
|
||||
Raises:
|
||||
RuntimeError: if the workload identity client is not configured.
|
||||
"""
|
||||
client = get_workload_identity_client()
|
||||
if client is None:
|
||||
raise RuntimeError("Workload identity client is not configured")
|
||||
claims = populate_claims_for_workload(unified_job)
|
||||
kwargs = {"claims": claims, "scope": scope, "audience": audience}
|
||||
if workload_ttl_seconds:
|
||||
kwargs["workload_ttl_seconds"] = workload_ttl_seconds
|
||||
return client.request_workload_jwt(**kwargs).jwt
|
||||
|
||||
|
||||
def with_path_cleanup(f):
|
||||
@functools.wraps(f)
|
||||
def _wrapped(self, *args, **kwargs):
|
||||
@@ -122,6 +204,7 @@ def dispatch_waiting_jobs(binder):
|
||||
if not kwargs:
|
||||
kwargs = {}
|
||||
binder.control('run', data={'task': serialize_task(uj._get_task_class()), 'args': [uj.id], 'kwargs': kwargs, 'uuid': uj.celery_task_id})
|
||||
UnifiedJob.objects.filter(pk=uj.pk, status='waiting').update(status='running', start_args='')
|
||||
|
||||
|
||||
class BaseTask(object):
|
||||
@@ -136,6 +219,60 @@ class BaseTask(object):
|
||||
self.update_attempts = int(getattr(settings, 'DISPATCHER_DB_DOWNTOWN_TOLLERANCE', settings.DISPATCHER_DB_DOWNTIME_TOLERANCE) / 5)
|
||||
self.runner_callback = self.callback_class(model=self.model)
|
||||
|
||||
@functools.cached_property
|
||||
def _credentials(self):
|
||||
"""
|
||||
Credentials for the task execution.
|
||||
Fetches credentials once using build_credentials_list() and stores
|
||||
them for the duration of the task to avoid redundant database queries.
|
||||
"""
|
||||
credentials_list = self.build_credentials_list(self.instance)
|
||||
# Convert to list to prevent re-evaluation of QuerySet
|
||||
return list(credentials_list)
|
||||
|
||||
def populate_workload_identity_tokens(self):
|
||||
"""
|
||||
Populate credentials with workload identity tokens.
|
||||
|
||||
Sets the context on Credential objects that have input sources
|
||||
using compatible external credential types.
|
||||
"""
|
||||
credential_input_sources = (
|
||||
(credential.context, src)
|
||||
for credential in self._credentials
|
||||
for src in credential.input_sources.all()
|
||||
if any(
|
||||
field.get('id') == 'workload_identity_token' and field.get('internal')
|
||||
for field in src.source_credential.credential_type.inputs.get('fields', [])
|
||||
)
|
||||
)
|
||||
for credential_ctx, input_src in credential_input_sources:
|
||||
if flag_enabled("FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED"):
|
||||
effective_timeout = self.get_instance_timeout(self.instance)
|
||||
workload_ttl = effective_timeout if effective_timeout else None
|
||||
try:
|
||||
jwt = retrieve_workload_identity_jwt(
|
||||
self.instance,
|
||||
audience=input_src.source_credential.get_input('jwt_aud'),
|
||||
scope=AutomationControllerJobScope.name,
|
||||
workload_ttl_seconds=workload_ttl,
|
||||
)
|
||||
# Store token keyed by input source PK, since a credential can have
|
||||
# multiple input sources (one per field), each potentially with a different audience
|
||||
credential_ctx[input_src.pk] = {"workload_identity_token": jwt}
|
||||
except Exception as e:
|
||||
self.instance.job_explanation = (
|
||||
f'Could not generate workload identity token for credential {input_src.source_credential.name} used in this job. Error:\n{e}'
|
||||
)
|
||||
self.instance.status = 'error'
|
||||
self.instance.save()
|
||||
else:
|
||||
self.instance.job_explanation = (
|
||||
f'Flag FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED is not enabled, required for credential {input_src.source_credential.name} used in this job.'
|
||||
)
|
||||
self.instance.status = 'error'
|
||||
self.instance.save()
|
||||
|
||||
def update_model(self, pk, _attempt=0, **updates):
|
||||
return update_model(self.model, pk, _attempt=0, _max_attempts=self.update_attempts, **updates)
|
||||
|
||||
@@ -287,6 +424,19 @@ class BaseTask(object):
|
||||
private_data_files['credentials'][credential] = self.write_private_data_file(private_data_dir, None, data, sub_dir='env')
|
||||
for credential, data in private_data.get('certificates', {}).items():
|
||||
self.write_private_data_file(private_data_dir, 'ssh_key_data-cert.pub', data, sub_dir=os.path.join('artifacts', str(self.instance.id)))
|
||||
|
||||
# Copy vendor collections to private_data_dir for indirect node counting
|
||||
# This makes external query files available to the callback plugin in EEs
|
||||
if flag_enabled("FEATURE_INDIRECT_NODE_COUNTING_ENABLED"):
|
||||
vendor_src = '/var/lib/awx/vendor_collections'
|
||||
vendor_dest = os.path.join(private_data_dir, 'vendor_collections')
|
||||
if os.path.exists(vendor_src):
|
||||
try:
|
||||
shutil.copytree(vendor_src, vendor_dest)
|
||||
logger.debug(f"Copied vendor collections from {vendor_src} to {vendor_dest}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to copy vendor collections: {e}")
|
||||
|
||||
return private_data_files, ssh_key_data
|
||||
|
||||
def build_passwords(self, instance, runtime_passwords):
|
||||
@@ -360,6 +510,7 @@ class BaseTask(object):
|
||||
return []
|
||||
|
||||
def get_instance_timeout(self, instance):
|
||||
"""Return the effective job timeout in seconds."""
|
||||
global_timeout_setting_name = instance._global_timeout_setting()
|
||||
if global_timeout_setting_name:
|
||||
global_timeout = getattr(settings, global_timeout_setting_name, 0)
|
||||
@@ -468,48 +619,32 @@ class BaseTask(object):
|
||||
def should_use_fact_cache(self):
|
||||
return False
|
||||
|
||||
def transition_status(self, pk: int) -> bool:
|
||||
"""Atomically transition status to running, if False returned, another process got it"""
|
||||
with transaction.atomic():
|
||||
# Explanation of parts for the fetch:
|
||||
# .values - avoid loading a full object, this is known to lead to deadlocks due to signals
|
||||
# the signals load other related rows which another process may be locking, and happens in practice
|
||||
# of=('self',) - keeps FK tables out of the lock list, another way deadlocks can happen
|
||||
# .get - just load the single job
|
||||
instance_data = UnifiedJob.objects.select_for_update(of=('self',)).values('status', 'cancel_flag').get(pk=pk)
|
||||
|
||||
# If status is not waiting (obtained under lock) then this process does not have clearence to run
|
||||
if instance_data['status'] == 'waiting':
|
||||
if instance_data['cancel_flag']:
|
||||
updated_status = 'canceled'
|
||||
else:
|
||||
updated_status = 'running'
|
||||
# Explanation of the update:
|
||||
# .filter - again, do not load the full object
|
||||
# .update - a bulk update on just that one row, avoid loading unintended data
|
||||
UnifiedJob.objects.filter(pk=pk).update(status=updated_status, start_args='')
|
||||
elif instance_data['status'] == 'running':
|
||||
logger.info(f'Job {pk} is being ran by another process, exiting')
|
||||
return False
|
||||
return True
|
||||
|
||||
@with_path_cleanup
|
||||
@with_signal_handling
|
||||
def run(self, pk, **kwargs):
|
||||
"""
|
||||
Run the job/task and capture its output.
|
||||
"""
|
||||
if not self.instance: # Used to skip fetch for local runs
|
||||
if not self.transition_status(pk):
|
||||
logger.info(f'Job {pk} is being ran by another process, exiting')
|
||||
return
|
||||
|
||||
# Load the instance
|
||||
self.instance = self.update_model(pk)
|
||||
if not self.instance: # Used to skip fetch for local runs
|
||||
# Load the instance
|
||||
self.instance = self.update_model(pk)
|
||||
|
||||
# status should be "running" from dispatch_waiting_jobs,
|
||||
# but may still be "waiting" if the worker picked this up before the status update landed.
|
||||
if self.instance.status == 'waiting':
|
||||
UnifiedJob.objects.filter(pk=pk).update(status="running", start_args='')
|
||||
self.instance.refresh_from_db()
|
||||
|
||||
if self.instance.status != 'running':
|
||||
logger.error(f'Not starting {self.instance.status} task pk={pk} because its status "{self.instance.status}" is not expected')
|
||||
return
|
||||
|
||||
if self.instance.cancel_flag:
|
||||
self.instance = self.update_model(pk, status='canceled')
|
||||
self.instance.websocket_emit_status('canceled')
|
||||
return
|
||||
|
||||
self.instance.websocket_emit_status("running")
|
||||
status, rc = 'error', None
|
||||
self.runner_callback.event_ct = 0
|
||||
@@ -548,6 +683,12 @@ class BaseTask(object):
|
||||
if not os.path.exists(settings.AWX_ISOLATION_BASE_PATH):
|
||||
raise RuntimeError('AWX_ISOLATION_BASE_PATH=%s does not exist' % settings.AWX_ISOLATION_BASE_PATH)
|
||||
|
||||
if flag_enabled("FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED"):
|
||||
logger.info(f'Generating workload identity tokens for {self.instance.log_format}')
|
||||
self.populate_workload_identity_tokens()
|
||||
if self.instance.status == 'error':
|
||||
raise RuntimeError('not starting %s task' % self.instance.status)
|
||||
|
||||
# May have to serialize the value
|
||||
private_data_files, ssh_key_data = self.build_private_data_files(self.instance, private_data_dir)
|
||||
passwords = self.build_passwords(self.instance, kwargs)
|
||||
@@ -565,7 +706,7 @@ class BaseTask(object):
|
||||
|
||||
self.runner_callback.job_created = str(self.instance.created)
|
||||
|
||||
credentials = self.build_credentials_list(self.instance)
|
||||
credentials = self._credentials
|
||||
|
||||
container_root = None
|
||||
if settings.IS_K8S and isinstance(self.instance, ProjectUpdate):
|
||||
@@ -851,7 +992,7 @@ class SourceControlMixin(BaseTask):
|
||||
self.release_lock(project)
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
class RunJob(SourceControlMixin, BaseTask):
|
||||
"""
|
||||
Run a job using ansible-playbook.
|
||||
@@ -860,6 +1001,29 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
model = Job
|
||||
event_model = JobEvent
|
||||
|
||||
def _extract_credentials_of_kind(self, kind: str):
|
||||
return (cred for cred in self._credentials if cred.credential_type.kind == kind)
|
||||
|
||||
@property
|
||||
def _machine_credential(self) -> object:
|
||||
"""Get machine credential."""
|
||||
return next(self._extract_credentials_of_kind('ssh'), None)
|
||||
|
||||
@property
|
||||
def _vault_credentials(self) -> list[object]:
|
||||
"""Get vault credentials."""
|
||||
return list(self._extract_credentials_of_kind('vault'))
|
||||
|
||||
@property
|
||||
def _network_credentials(self) -> list[object]:
|
||||
"""Get network credentials."""
|
||||
return list(self._extract_credentials_of_kind('net'))
|
||||
|
||||
@property
|
||||
def _cloud_credentials(self) -> list[object]:
|
||||
"""Get cloud credentials."""
|
||||
return list(self._extract_credentials_of_kind('cloud'))
|
||||
|
||||
def build_private_data(self, job, private_data_dir):
|
||||
"""
|
||||
Returns a dict of the form
|
||||
@@ -877,7 +1041,7 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
}
|
||||
"""
|
||||
private_data = {'credentials': {}}
|
||||
for credential in job.credentials.prefetch_related('input_sources__source_credential').all():
|
||||
for credential in self._credentials:
|
||||
# If we were sent SSH credentials, decrypt them and send them
|
||||
# back (they will be written to a temporary file).
|
||||
if credential.has_input('ssh_key_data'):
|
||||
@@ -893,14 +1057,14 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
and ansible-vault.
|
||||
"""
|
||||
passwords = super(RunJob, self).build_passwords(job, runtime_passwords)
|
||||
cred = job.machine_credential
|
||||
cred = self._machine_credential
|
||||
if cred:
|
||||
for field in ('ssh_key_unlock', 'ssh_password', 'become_password', 'vault_password'):
|
||||
value = runtime_passwords.get(field, cred.get_input('password' if field == 'ssh_password' else field, default=''))
|
||||
if value not in ('', 'ASK'):
|
||||
passwords[field] = value
|
||||
|
||||
for cred in job.vault_credentials:
|
||||
for cred in self._vault_credentials:
|
||||
field = 'vault_password'
|
||||
vault_id = cred.get_input('vault_id', default=None)
|
||||
if vault_id:
|
||||
@@ -916,7 +1080,7 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
key unlock over network key unlock.
|
||||
'''
|
||||
if 'ssh_key_unlock' not in passwords:
|
||||
for cred in job.network_credentials:
|
||||
for cred in self._network_credentials:
|
||||
if cred.inputs.get('ssh_key_unlock'):
|
||||
passwords['ssh_key_unlock'] = runtime_passwords.get('ssh_key_unlock', cred.get_input('ssh_key_unlock', default=''))
|
||||
break
|
||||
@@ -951,11 +1115,11 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
|
||||
# Set environment variables for cloud credentials.
|
||||
cred_files = private_data_files.get('credentials', {})
|
||||
for cloud_cred in job.cloud_credentials:
|
||||
for cloud_cred in self._cloud_credentials:
|
||||
if cloud_cred and cloud_cred.credential_type.namespace == 'openstack' and cred_files.get(cloud_cred, ''):
|
||||
env['OS_CLIENT_CONFIG_FILE'] = get_incontainer_path(cred_files.get(cloud_cred, ''), private_data_dir)
|
||||
|
||||
for network_cred in job.network_credentials:
|
||||
for network_cred in self._network_credentials:
|
||||
env['ANSIBLE_NET_USERNAME'] = network_cred.get_input('username', default='')
|
||||
env['ANSIBLE_NET_PASSWORD'] = network_cred.get_input('password', default='')
|
||||
|
||||
@@ -998,6 +1162,11 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
if 'callbacks_enabled' in config_values:
|
||||
env['ANSIBLE_CALLBACKS_ENABLED'] += ':' + config_values['callbacks_enabled']
|
||||
|
||||
# Add vendor collections path for external query file discovery
|
||||
vendor_collections_path = os.path.join(CONTAINER_ROOT, 'vendor_collections')
|
||||
env['ANSIBLE_COLLECTIONS_PATH'] = f"{vendor_collections_path}:{env['ANSIBLE_COLLECTIONS_PATH']}"
|
||||
logger.debug(f"ANSIBLE_COLLECTIONS_PATH updated for vendor collections: {env['ANSIBLE_COLLECTIONS_PATH']}")
|
||||
|
||||
return env
|
||||
|
||||
def build_args(self, job, private_data_dir, passwords):
|
||||
@@ -1005,7 +1174,7 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
Build command line argument list for running ansible-playbook,
|
||||
optionally using ssh-agent for public/private key authentication.
|
||||
"""
|
||||
creds = job.machine_credential
|
||||
creds = self._machine_credential
|
||||
|
||||
ssh_username, become_username, become_method = '', '', ''
|
||||
if creds:
|
||||
@@ -1157,10 +1326,16 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
return
|
||||
if self.should_use_fact_cache() and self.runner_callback.artifacts_processed:
|
||||
job.log_lifecycle("finish_job_fact_cache")
|
||||
if job.inventory.kind == 'constructed':
|
||||
hosts_qs = job.get_source_hosts_for_constructed_inventory()
|
||||
else:
|
||||
hosts_qs = job.inventory.hosts
|
||||
finish_fact_cache(
|
||||
hosts_qs,
|
||||
artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)),
|
||||
job_id=job.id,
|
||||
inventory_id=job.inventory_id,
|
||||
job_created=job.created,
|
||||
)
|
||||
|
||||
def final_run_hook(self, job, status, private_data_dir):
|
||||
@@ -1174,7 +1349,7 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
update_inventory_computed_fields.delay(inventory.id)
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
class RunProjectUpdate(BaseTask):
|
||||
model = ProjectUpdate
|
||||
event_model = ProjectUpdateEvent
|
||||
@@ -1329,7 +1504,6 @@ class RunProjectUpdate(BaseTask):
|
||||
'local_path': os.path.basename(project_update.project.local_path),
|
||||
'project_path': project_update.get_project_path(check_if_exists=False), # deprecated
|
||||
'insights_url': settings.INSIGHTS_URL_BASE,
|
||||
'oidc_endpoint': settings.INSIGHTS_OIDC_ENDPOINT,
|
||||
'awx_license_type': get_license().get('license_type', 'UNLICENSED'),
|
||||
'awx_version': get_awx_version(),
|
||||
'scm_url': scm_url,
|
||||
@@ -1513,7 +1687,7 @@ class RunProjectUpdate(BaseTask):
|
||||
return []
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
class RunInventoryUpdate(SourceControlMixin, BaseTask):
|
||||
model = InventoryUpdate
|
||||
event_model = InventoryUpdateEvent
|
||||
@@ -1776,7 +1950,7 @@ class RunInventoryUpdate(SourceControlMixin, BaseTask):
|
||||
raise PostRunError('Error occured while saving inventory data, see traceback or server logs', status='error', tb=traceback.format_exc())
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
class RunAdHocCommand(BaseTask):
|
||||
"""
|
||||
Run an ad hoc command using ansible.
|
||||
@@ -1929,7 +2103,7 @@ class RunAdHocCommand(BaseTask):
|
||||
return d
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
class RunSystemJob(BaseTask):
|
||||
model = SystemJob
|
||||
event_model = SystemJobEvent
|
||||
|
||||
@@ -393,9 +393,9 @@ def evaluate_policy(instance):
|
||||
raise PolicyEvaluationError(_('Following certificate settings are missing for OPA_AUTH_TYPE=Certificate: {}').format(cert_settings_missing))
|
||||
|
||||
query_paths = [
|
||||
('Organization', instance.organization.opa_query_path),
|
||||
('Inventory', instance.inventory.opa_query_path),
|
||||
('Job template', instance.job_template.opa_query_path),
|
||||
('Organization', instance.organization.opa_query_path if instance.organization else None),
|
||||
('Inventory', instance.inventory.opa_query_path if instance.inventory else None),
|
||||
('Job template', instance.job_template.opa_query_path if instance.job_template else None),
|
||||
]
|
||||
violations = dict()
|
||||
errors = dict()
|
||||
|
||||
@@ -20,6 +20,9 @@ import ansible_runner
|
||||
# django-ansible-base
|
||||
from ansible_base.lib.utils.db import advisory_lock
|
||||
|
||||
# Dispatcherd
|
||||
from dispatcherd.publish import task
|
||||
|
||||
# AWX
|
||||
from awx.main.utils.execution_environments import get_default_pod_spec
|
||||
from awx.main.exceptions import ReceptorNodeNotFound
|
||||
@@ -32,7 +35,6 @@ from awx.main.constants import MAX_ISOLATED_PATH_COLON_DELIMITER
|
||||
from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
|
||||
from awx.main.models import Instance, InstanceLink, UnifiedJob, ReceptorAddress
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.publish import task as task_awx
|
||||
|
||||
# Receptorctl
|
||||
from receptorctl.socket_interface import ReceptorControl
|
||||
@@ -852,7 +854,7 @@ def reload_receptor():
|
||||
raise RuntimeError("Receptor reload failed")
|
||||
|
||||
|
||||
@task_awx(on_duplicate='queue_one')
|
||||
@task(on_duplicate='queue_one')
|
||||
def write_receptor_config():
|
||||
"""
|
||||
This task runs async on each control node, K8S only.
|
||||
@@ -875,7 +877,7 @@ def write_receptor_config():
|
||||
reload_receptor()
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, on_duplicate='discard')
|
||||
@task(queue=get_task_queuename, on_duplicate='discard')
|
||||
def remove_deprovisioned_node(hostname):
|
||||
InstanceLink.objects.filter(source__hostname=hostname).update(link_state=InstanceLink.States.REMOVING)
|
||||
InstanceLink.objects.filter(target__instance__hostname=hostname).update(link_state=InstanceLink.States.REMOVING)
|
||||
|
||||
@@ -69,7 +69,7 @@ def signal_callback():
|
||||
|
||||
def with_signal_handling(f):
|
||||
"""
|
||||
Change signal handling to make signal_callback return True in event of SIGTERM or SIGINT.
|
||||
Change signal handling to make signal_callback return True in event of SIGTERM, SIGINT, or SIGUSR1.
|
||||
"""
|
||||
|
||||
@functools.wraps(f)
|
||||
|
||||
@@ -9,12 +9,12 @@ import shutil
|
||||
import time
|
||||
from collections import namedtuple
|
||||
from contextlib import redirect_stdout
|
||||
from datetime import datetime
|
||||
from packaging.version import Version
|
||||
from io import StringIO
|
||||
|
||||
# dispatcherd
|
||||
from dispatcherd.factories import get_control_from_settings
|
||||
from dispatcherd.publish import task
|
||||
|
||||
# Runner
|
||||
import ansible_runner.cleanup
|
||||
@@ -56,7 +56,6 @@ from awx.main.analytics.subsystem_metrics import DispatcherMetrics
|
||||
from awx.main.constants import ACTIVE_STATES, ERROR_STATES
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
from awx.main.dispatch import get_task_queuename, reaper
|
||||
from awx.main.dispatch.publish import task as task_awx
|
||||
from awx.main.models import (
|
||||
Instance,
|
||||
InstanceGroup,
|
||||
@@ -74,7 +73,6 @@ from awx.main.tasks.host_indirect import save_indirect_host_entries
|
||||
from awx.main.tasks.receptor import administrative_workunit_reaper, get_receptor_ctl, worker_cleanup, worker_info, write_receptor_config
|
||||
from awx.main.utils.common import ignore_inventory_computed_fields, ignore_inventory_group_removal
|
||||
from awx.main.utils.reload import stop_local_services
|
||||
from dispatcherd.publish import task
|
||||
|
||||
logger = logging.getLogger('awx.main.tasks.system')
|
||||
|
||||
@@ -95,7 +93,10 @@ def _run_dispatch_startup_common():
|
||||
|
||||
# TODO: Enable this on VM installs
|
||||
if settings.IS_K8S:
|
||||
write_receptor_config()
|
||||
try:
|
||||
write_receptor_config()
|
||||
except Exception:
|
||||
logger.exception("Failed to write receptor config, skipping.")
|
||||
|
||||
try:
|
||||
convert_jsonfields()
|
||||
@@ -125,20 +126,12 @@ def _run_dispatch_startup_common():
|
||||
# no-op.
|
||||
#
|
||||
apply_cluster_membership_policies()
|
||||
cluster_node_heartbeat()
|
||||
cluster_node_heartbeat(None)
|
||||
reaper.startup_reaping()
|
||||
m = DispatcherMetrics()
|
||||
m.reset_values()
|
||||
|
||||
|
||||
def _legacy_dispatch_startup():
|
||||
"""
|
||||
Legacy branch for startup: simply performs reaping of waiting jobs with a zero grace period.
|
||||
"""
|
||||
logger.debug("Legacy dispatcher: calling reaper.reap_waiting with grace_period=0")
|
||||
reaper.reap_waiting(grace_period=0)
|
||||
|
||||
|
||||
def _dispatcherd_dispatch_startup():
|
||||
"""
|
||||
New dispatcherd branch for startup: uses the control API to re-submit waiting jobs.
|
||||
@@ -153,21 +146,16 @@ def dispatch_startup():
|
||||
"""
|
||||
System initialization at startup.
|
||||
First, execute the common logic.
|
||||
Then, if FEATURE_DISPATCHERD_ENABLED is enabled, re-submit waiting jobs via the control API;
|
||||
otherwise, fall back to legacy reaping of waiting jobs.
|
||||
Then, re-submit waiting jobs via the control API.
|
||||
"""
|
||||
_run_dispatch_startup_common()
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
_dispatcherd_dispatch_startup()
|
||||
else:
|
||||
_legacy_dispatch_startup()
|
||||
_dispatcherd_dispatch_startup()
|
||||
|
||||
|
||||
def inform_cluster_of_shutdown():
|
||||
"""
|
||||
Clean system shutdown that marks the current instance offline.
|
||||
In legacy mode, it also reaps waiting jobs.
|
||||
In dispatcherd mode, it relies on dispatcherd's built-in cleanup.
|
||||
Relies on dispatcherd's built-in cleanup.
|
||||
"""
|
||||
try:
|
||||
inst = Instance.objects.get(hostname=settings.CLUSTER_HOST_ID)
|
||||
@@ -176,18 +164,11 @@ def inform_cluster_of_shutdown():
|
||||
logger.exception("Cluster host not found: %s", settings.CLUSTER_HOST_ID)
|
||||
return
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
logger.debug("Dispatcherd mode: no extra reaping required for instance %s", inst.hostname)
|
||||
else:
|
||||
try:
|
||||
logger.debug("Legacy mode: reaping waiting jobs for instance %s", inst.hostname)
|
||||
reaper.reap_waiting(inst, grace_period=0)
|
||||
except Exception:
|
||||
logger.exception("Failed to reap waiting jobs for %s", inst.hostname)
|
||||
logger.debug("No extra reaping required for instance %s", inst.hostname)
|
||||
logger.warning("Normal shutdown processed for instance %s; instance removed from capacity pool.", inst.hostname)
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600 * 5)
|
||||
@task(queue=get_task_queuename, timeout=3600 * 5)
|
||||
def migrate_jsonfield(table, pkfield, columns):
|
||||
batchsize = 10000
|
||||
with advisory_lock(f'json_migration_{table}', wait=False) as acquired:
|
||||
@@ -233,7 +214,7 @@ def migrate_jsonfield(table, pkfield, columns):
|
||||
logger.warning(f"Migration of {table} to jsonb is finished.")
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600, on_duplicate='queue_one')
|
||||
@task(queue=get_task_queuename, timeout=3600, on_duplicate='queue_one')
|
||||
def apply_cluster_membership_policies():
|
||||
from awx.main.signals import disable_activity_stream
|
||||
|
||||
@@ -345,7 +326,7 @@ def apply_cluster_membership_policies():
|
||||
logger.debug('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute))
|
||||
|
||||
|
||||
@task_awx(queue='tower_settings_change', timeout=600)
|
||||
@task(queue='tower_settings_change', timeout=600)
|
||||
def clear_setting_cache(setting_keys):
|
||||
# log that cache is being cleared
|
||||
logger.info(f"clear_setting_cache of keys {setting_keys}")
|
||||
@@ -363,7 +344,7 @@ def clear_setting_cache(setting_keys):
|
||||
ctl.control('set_log_level', data={'level': settings.LOG_AGGREGATOR_LEVEL})
|
||||
|
||||
|
||||
@task_awx(queue='tower_broadcast_all', timeout=600)
|
||||
@task(queue='tower_broadcast_all', timeout=600)
|
||||
def delete_project_files(project_path):
|
||||
# TODO: possibly implement some retry logic
|
||||
lock_file = project_path + '.lock'
|
||||
@@ -381,7 +362,7 @@ def delete_project_files(project_path):
|
||||
logger.exception('Could not remove lock file {}'.format(lock_file))
|
||||
|
||||
|
||||
@task_awx(queue='tower_broadcast_all')
|
||||
@task(queue='tower_broadcast_all')
|
||||
def profile_sql(threshold=1, minutes=1):
|
||||
if threshold <= 0:
|
||||
cache.delete('awx-profile-sql-threshold')
|
||||
@@ -391,7 +372,7 @@ def profile_sql(threshold=1, minutes=1):
|
||||
logger.error('SQL QUERIES >={}s ENABLED FOR {} MINUTE(S)'.format(threshold, minutes))
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=1800)
|
||||
@task(queue=get_task_queuename, timeout=1800)
|
||||
def send_notifications(notification_list, job_id=None):
|
||||
if not isinstance(notification_list, list):
|
||||
raise TypeError("notification_list should be of type list")
|
||||
@@ -436,13 +417,13 @@ def events_processed_hook(unified_job):
|
||||
save_indirect_host_entries.delay(unified_job.id)
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600 * 5, on_duplicate='discard')
|
||||
@task(queue=get_task_queuename, timeout=3600 * 5, on_duplicate='discard')
|
||||
def gather_analytics():
|
||||
if is_run_threshold_reached(getattr(settings, 'AUTOMATION_ANALYTICS_LAST_GATHER', None), settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL):
|
||||
analytics.gather()
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=600, on_duplicate='queue_one')
|
||||
@task(queue=get_task_queuename, timeout=600, on_duplicate='queue_one')
|
||||
def purge_old_stdout_files():
|
||||
nowtime = time.time()
|
||||
for f in os.listdir(settings.JOBOUTPUT_ROOT):
|
||||
@@ -504,18 +485,18 @@ class CleanupImagesAndFiles:
|
||||
cls.run_remote(this_inst, **kwargs)
|
||||
|
||||
|
||||
@task_awx(queue='tower_broadcast_all', timeout=3600)
|
||||
@task(queue='tower_broadcast_all', timeout=3600)
|
||||
def handle_removed_image(remove_images=None):
|
||||
"""Special broadcast invocation of this method to handle case of deleted EE"""
|
||||
CleanupImagesAndFiles.run(remove_images=remove_images, file_pattern='')
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600, on_duplicate='queue_one')
|
||||
@task(queue=get_task_queuename, timeout=3600, on_duplicate='queue_one')
|
||||
def cleanup_images_and_files():
|
||||
CleanupImagesAndFiles.run(image_prune=True)
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=600, on_duplicate='queue_one')
|
||||
@task(queue=get_task_queuename, timeout=600, on_duplicate='queue_one')
|
||||
def execution_node_health_check(node):
|
||||
if node == '':
|
||||
logger.warning('Remote health check incorrectly called with blank string')
|
||||
@@ -640,44 +621,13 @@ def inspect_execution_and_hop_nodes(instance_list):
|
||||
execution_node_health_check.apply_async([hostname])
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, bind_kwargs=['dispatch_time', 'worker_tasks'])
|
||||
def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
|
||||
"""
|
||||
Original implementation for AWX dispatcher.
|
||||
Uses worker_tasks from bind_kwargs to track running tasks.
|
||||
"""
|
||||
# Run common instance management logic
|
||||
this_inst, instance_list, lost_instances = _heartbeat_instance_management()
|
||||
if this_inst is None:
|
||||
return # Early return case from instance management
|
||||
|
||||
# Check versions
|
||||
_heartbeat_check_versions(this_inst, instance_list)
|
||||
|
||||
# Handle lost instances
|
||||
_heartbeat_handle_lost_instances(lost_instances, this_inst)
|
||||
|
||||
# Run local reaper - original implementation using worker_tasks
|
||||
if worker_tasks is not None:
|
||||
active_task_ids = []
|
||||
for task_list in worker_tasks.values():
|
||||
active_task_ids.extend(task_list)
|
||||
|
||||
# Convert dispatch_time to datetime
|
||||
ref_time = datetime.fromisoformat(dispatch_time) if dispatch_time else now()
|
||||
|
||||
reaper.reap(instance=this_inst, excluded_uuids=active_task_ids, ref_time=ref_time)
|
||||
|
||||
if max(len(task_list) for task_list in worker_tasks.values()) <= 1:
|
||||
reaper.reap_waiting(instance=this_inst, excluded_uuids=active_task_ids, ref_time=ref_time)
|
||||
|
||||
|
||||
@task(queue=get_task_queuename, bind=True)
|
||||
def adispatch_cluster_node_heartbeat(binder):
|
||||
def cluster_node_heartbeat(binder):
|
||||
"""
|
||||
Dispatcherd implementation.
|
||||
Uses Control API to get running tasks.
|
||||
"""
|
||||
|
||||
# Run common instance management logic
|
||||
this_inst, instance_list, lost_instances = _heartbeat_instance_management()
|
||||
if this_inst is None:
|
||||
@@ -690,6 +640,9 @@ def adispatch_cluster_node_heartbeat(binder):
|
||||
_heartbeat_handle_lost_instances(lost_instances, this_inst)
|
||||
|
||||
# Get running tasks using dispatcherd API
|
||||
if binder is None:
|
||||
logger.debug("Heartbeat finished in startup.")
|
||||
return
|
||||
active_task_ids = _get_active_task_ids_from_dispatcherd(binder)
|
||||
if active_task_ids is None:
|
||||
logger.warning("No active task IDs retrieved from dispatcherd, skipping reaper")
|
||||
@@ -807,14 +760,16 @@ def _heartbeat_check_versions(this_inst, instance_list):
|
||||
|
||||
|
||||
def _heartbeat_handle_lost_instances(lost_instances, this_inst):
|
||||
"""Handle lost instances by reaping their jobs and marking them offline."""
|
||||
"""Handle lost instances by reaping their running jobs and marking them offline."""
|
||||
for other_inst in lost_instances:
|
||||
try:
|
||||
# Any jobs marked as running will be marked as error
|
||||
explanation = "Job reaped due to instance shutdown"
|
||||
reaper.reap(other_inst, job_explanation=explanation)
|
||||
reaper.reap_waiting(other_inst, grace_period=0, job_explanation=explanation)
|
||||
# Any jobs that were waiting to be processed by this node will be handed back to task manager
|
||||
UnifiedJob.objects.filter(status='waiting', controller_node=other_inst.hostname).update(status='pending', controller_node='', execution_node='')
|
||||
except Exception:
|
||||
logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
|
||||
logger.exception('failed to re-process jobs for lost instance {}'.format(other_inst.hostname))
|
||||
try:
|
||||
if settings.AWX_AUTO_DEPROVISION_INSTANCES and other_inst.node_type == "control":
|
||||
deprovision_hostname = other_inst.hostname
|
||||
@@ -839,7 +794,7 @@ def _heartbeat_handle_lost_instances(lost_instances, this_inst):
|
||||
logger.exception('No SQL state available. Error marking {} as lost'.format(other_inst.hostname))
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=1800, on_duplicate='queue_one')
|
||||
@task(queue=get_task_queuename, timeout=1800, on_duplicate='queue_one')
|
||||
def awx_receptor_workunit_reaper():
|
||||
"""
|
||||
When an AWX job is launched via receptor, files such as status, stdin, and stdout are created
|
||||
@@ -885,7 +840,7 @@ def awx_receptor_workunit_reaper():
|
||||
administrative_workunit_reaper(receptor_work_list)
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=1800, on_duplicate='queue_one')
|
||||
@task(queue=get_task_queuename, timeout=1800, on_duplicate='queue_one')
|
||||
def awx_k8s_reaper():
|
||||
if not settings.RECEPTOR_RELEASE_WORK:
|
||||
return
|
||||
@@ -908,7 +863,7 @@ def awx_k8s_reaper():
|
||||
logger.exception("Failed to delete orphaned pod {} from {}".format(job.log_format, group))
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600 * 5, on_duplicate='discard')
|
||||
@task(queue=get_task_queuename, timeout=3600 * 5, on_duplicate='discard')
|
||||
def awx_periodic_scheduler():
|
||||
lock_session_timeout_milliseconds = settings.TASK_MANAGER_LOCK_TIMEOUT * 1000
|
||||
with advisory_lock('awx_periodic_scheduler_lock', lock_session_timeout_milliseconds=lock_session_timeout_milliseconds, wait=False) as acquired:
|
||||
@@ -965,7 +920,7 @@ def awx_periodic_scheduler():
|
||||
emit_channel_notification('schedules-changed', dict(id=schedule.id, group_name="schedules"))
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600)
|
||||
@task(queue=get_task_queuename, timeout=3600)
|
||||
def handle_failure_notifications(task_ids):
|
||||
"""A task-ified version of the method that sends notifications."""
|
||||
found_task_ids = set()
|
||||
@@ -980,7 +935,7 @@ def handle_failure_notifications(task_ids):
|
||||
logger.warning(f'Could not send notifications for {deleted_tasks} because they were not found in the database')
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600 * 5)
|
||||
@task(queue=get_task_queuename, timeout=3600 * 5)
|
||||
def update_inventory_computed_fields(inventory_id):
|
||||
"""
|
||||
Signal handler and wrapper around inventory.update_computed_fields to
|
||||
@@ -1030,7 +985,7 @@ def update_smart_memberships_for_inventory(smart_inventory):
|
||||
return False
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600, on_duplicate='queue_one')
|
||||
@task(queue=get_task_queuename, timeout=3600, on_duplicate='queue_one')
|
||||
def update_host_smart_inventory_memberships():
|
||||
smart_inventories = Inventory.objects.filter(kind='smart', host_filter__isnull=False, pending_deletion=False)
|
||||
changed_inventories = set([])
|
||||
@@ -1046,7 +1001,7 @@ def update_host_smart_inventory_memberships():
|
||||
smart_inventory.update_computed_fields()
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600 * 5)
|
||||
@task(queue=get_task_queuename, timeout=3600 * 5)
|
||||
def delete_inventory(inventory_id, user_id, retries=5):
|
||||
# Delete inventory as user
|
||||
if user_id is None:
|
||||
@@ -1108,7 +1063,7 @@ def _reconstruct_relationships(copy_mapping):
|
||||
new_obj.save()
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=600)
|
||||
@task(queue=get_task_queuename, timeout=600)
|
||||
def deep_copy_model_obj(model_module, model_name, obj_pk, new_obj_pk, user_pk, permission_check_func=None):
|
||||
logger.debug('Deep copy {} from {} to {}.'.format(model_name, obj_pk, new_obj_pk))
|
||||
|
||||
@@ -1163,7 +1118,7 @@ def deep_copy_model_obj(model_module, model_name, obj_pk, new_obj_pk, user_pk, p
|
||||
update_inventory_computed_fields.delay(new_obj.id)
|
||||
|
||||
|
||||
@task_awx(queue=get_task_queuename, timeout=3600, on_duplicate='discard')
|
||||
@task(queue=get_task_queuename, timeout=3600, on_duplicate='discard')
|
||||
def periodic_resource_sync():
|
||||
if not getattr(settings, 'RESOURCE_SERVER', None):
|
||||
logger.debug("Skipping periodic resource_sync, RESOURCE_SERVER not configured")
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
---
|
||||
authors:
|
||||
- AWX Project Contributors <awx-project@googlegroups.com>
|
||||
dependencies: {}
|
||||
description: External query testing collection. No embedded query file. Not for use in production.
|
||||
documentation: https://github.com/ansible/awx
|
||||
homepage: https://github.com/ansible/awx
|
||||
issues: https://github.com/ansible/awx
|
||||
license:
|
||||
- GPL-3.0-or-later
|
||||
name: external
|
||||
namespace: demo
|
||||
readme: README.md
|
||||
repository: https://github.com/ansible/awx
|
||||
tags:
|
||||
- demo
|
||||
- testing
|
||||
- external_query
|
||||
version: 1.0.0
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Same licensing as AWX
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
__metaclass__ = type
|
||||
|
||||
DOCUMENTATION = r'''
|
||||
---
|
||||
module: example
|
||||
|
||||
short_description: Module for specific live tests
|
||||
|
||||
version_added: "2.0.0"
|
||||
|
||||
description: This module is part of a test collection in local source. Used for external query testing.
|
||||
|
||||
options:
|
||||
host_name:
|
||||
description: Name to return as the host name.
|
||||
required: false
|
||||
type: str
|
||||
|
||||
author:
|
||||
- AWX Live Tests
|
||||
'''
|
||||
|
||||
EXAMPLES = r'''
|
||||
- name: Test with defaults
|
||||
demo.external.example:
|
||||
|
||||
- name: Test with custom host name
|
||||
demo.external.example:
|
||||
host_name: foo_host
|
||||
'''
|
||||
|
||||
RETURN = r'''
|
||||
direct_host_name:
|
||||
description: The name of the host, this will be collected with the feature.
|
||||
type: str
|
||||
returned: always
|
||||
sample: 'foo_host'
|
||||
'''
|
||||
|
||||
from ansible.module_utils.basic import AnsibleModule
|
||||
|
||||
|
||||
def run_module():
|
||||
module_args = dict(
|
||||
host_name=dict(type='str', required=False, default='foo_host_default'),
|
||||
)
|
||||
|
||||
result = dict(
|
||||
changed=False,
|
||||
other_data='sample_string',
|
||||
)
|
||||
|
||||
module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
|
||||
|
||||
if module.check_mode:
|
||||
module.exit_json(**result)
|
||||
|
||||
result['direct_host_name'] = module.params['host_name']
|
||||
result['nested_host_name'] = {'host_name': module.params['host_name']}
|
||||
result['name'] = 'vm-foo'
|
||||
|
||||
# non-cononical facts
|
||||
result['device_type'] = 'Fake Host'
|
||||
|
||||
module.exit_json(**result)
|
||||
|
||||
|
||||
def main():
|
||||
run_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,19 @@
|
||||
---
|
||||
authors:
|
||||
- AWX Project Contributors <awx-project@googlegroups.com>
|
||||
dependencies: {}
|
||||
description: External query testing collection v1.5.0. No embedded query file. Not for use in production.
|
||||
documentation: https://github.com/ansible/awx
|
||||
homepage: https://github.com/ansible/awx
|
||||
issues: https://github.com/ansible/awx
|
||||
license:
|
||||
- GPL-3.0-or-later
|
||||
name: external
|
||||
namespace: demo
|
||||
readme: README.md
|
||||
repository: https://github.com/ansible/awx
|
||||
tags:
|
||||
- demo
|
||||
- testing
|
||||
- external_query
|
||||
version: 1.5.0
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Same licensing as AWX
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
__metaclass__ = type
|
||||
|
||||
DOCUMENTATION = r'''
|
||||
---
|
||||
module: example
|
||||
|
||||
short_description: Module for specific live tests
|
||||
|
||||
version_added: "2.0.0"
|
||||
|
||||
description: This module is part of a test collection in local source. Used for external query testing.
|
||||
|
||||
options:
|
||||
host_name:
|
||||
description: Name to return as the host name.
|
||||
required: false
|
||||
type: str
|
||||
|
||||
author:
|
||||
- AWX Live Tests
|
||||
'''
|
||||
|
||||
EXAMPLES = r'''
|
||||
- name: Test with defaults
|
||||
demo.external.example:
|
||||
|
||||
- name: Test with custom host name
|
||||
demo.external.example:
|
||||
host_name: foo_host
|
||||
'''
|
||||
|
||||
RETURN = r'''
|
||||
direct_host_name:
|
||||
description: The name of the host, this will be collected with the feature.
|
||||
type: str
|
||||
returned: always
|
||||
sample: 'foo_host'
|
||||
'''
|
||||
|
||||
from ansible.module_utils.basic import AnsibleModule
|
||||
|
||||
|
||||
def run_module():
|
||||
module_args = dict(
|
||||
host_name=dict(type='str', required=False, default='foo_host_default'),
|
||||
)
|
||||
|
||||
result = dict(
|
||||
changed=False,
|
||||
other_data='sample_string',
|
||||
)
|
||||
|
||||
module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
|
||||
|
||||
if module.check_mode:
|
||||
module.exit_json(**result)
|
||||
|
||||
result['direct_host_name'] = module.params['host_name']
|
||||
result['nested_host_name'] = {'host_name': module.params['host_name']}
|
||||
result['name'] = 'vm-foo'
|
||||
|
||||
# non-cononical facts
|
||||
result['device_type'] = 'Fake Host'
|
||||
|
||||
module.exit_json(**result)
|
||||
|
||||
|
||||
def main():
|
||||
run_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,19 @@
|
||||
---
|
||||
authors:
|
||||
- AWX Project Contributors <awx-project@googlegroups.com>
|
||||
dependencies: {}
|
||||
description: External query testing collection v3.0.0. No embedded query file. Not for use in production.
|
||||
documentation: https://github.com/ansible/awx
|
||||
homepage: https://github.com/ansible/awx
|
||||
issues: https://github.com/ansible/awx
|
||||
license:
|
||||
- GPL-3.0-or-later
|
||||
name: external
|
||||
namespace: demo
|
||||
readme: README.md
|
||||
repository: https://github.com/ansible/awx
|
||||
tags:
|
||||
- demo
|
||||
- testing
|
||||
- external_query
|
||||
version: 3.0.0
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Same licensing as AWX
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
__metaclass__ = type
|
||||
|
||||
DOCUMENTATION = r'''
|
||||
---
|
||||
module: example
|
||||
|
||||
short_description: Module for specific live tests
|
||||
|
||||
version_added: "2.0.0"
|
||||
|
||||
description: This module is part of a test collection in local source. Used for external query testing.
|
||||
|
||||
options:
|
||||
host_name:
|
||||
description: Name to return as the host name.
|
||||
required: false
|
||||
type: str
|
||||
|
||||
author:
|
||||
- AWX Live Tests
|
||||
'''
|
||||
|
||||
EXAMPLES = r'''
|
||||
- name: Test with defaults
|
||||
demo.external.example:
|
||||
|
||||
- name: Test with custom host name
|
||||
demo.external.example:
|
||||
host_name: foo_host
|
||||
'''
|
||||
|
||||
RETURN = r'''
|
||||
direct_host_name:
|
||||
description: The name of the host, this will be collected with the feature.
|
||||
type: str
|
||||
returned: always
|
||||
sample: 'foo_host'
|
||||
'''
|
||||
|
||||
from ansible.module_utils.basic import AnsibleModule
|
||||
|
||||
|
||||
def run_module():
|
||||
module_args = dict(
|
||||
host_name=dict(type='str', required=False, default='foo_host_default'),
|
||||
)
|
||||
|
||||
result = dict(
|
||||
changed=False,
|
||||
other_data='sample_string',
|
||||
)
|
||||
|
||||
module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
|
||||
|
||||
if module.check_mode:
|
||||
module.exit_json(**result)
|
||||
|
||||
result['direct_host_name'] = module.params['host_name']
|
||||
result['nested_host_name'] = {'host_name': module.params['host_name']}
|
||||
result['name'] = 'vm-foo'
|
||||
|
||||
# non-cononical facts
|
||||
result['device_type'] = 'Fake Host'
|
||||
|
||||
module.exit_json(**result)
|
||||
|
||||
|
||||
def main():
|
||||
run_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
21
awx/main/tests/data/projects/facts/gather_slow.yml
Normal file
21
awx/main/tests/data/projects/facts/gather_slow.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
---
|
||||
# Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
|
||||
- hosts: all
|
||||
vars:
|
||||
extra_value: ""
|
||||
gather_facts: false
|
||||
connection: local
|
||||
tasks:
|
||||
- name: set a custom fact
|
||||
set_fact:
|
||||
foo: "bar{{ extra_value }}"
|
||||
bar:
|
||||
a:
|
||||
b:
|
||||
- "c"
|
||||
- "d"
|
||||
cacheable: true
|
||||
- name: sleep to create overlap window for concurrent job testing
|
||||
wait_for:
|
||||
timeout: 2
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
collections:
|
||||
- name: 'file:///tmp/live_tests/host_query_external_v1_0_0'
|
||||
type: git
|
||||
version: devel
|
||||
@@ -0,0 +1,8 @@
|
||||
---
|
||||
- hosts: all
|
||||
gather_facts: false
|
||||
connection: local
|
||||
tasks:
|
||||
- demo.external.example:
|
||||
register: result
|
||||
- debug: var=result
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
collections:
|
||||
- name: 'file:///tmp/live_tests/host_query_external_v1_5_0'
|
||||
type: git
|
||||
version: devel
|
||||
@@ -0,0 +1,8 @@
|
||||
---
|
||||
- hosts: all
|
||||
gather_facts: false
|
||||
connection: local
|
||||
tasks:
|
||||
- demo.external.example:
|
||||
register: result
|
||||
- debug: var=result
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
collections:
|
||||
- name: 'file:///tmp/live_tests/host_query_external_v3_0_0'
|
||||
type: git
|
||||
version: devel
|
||||
@@ -0,0 +1,8 @@
|
||||
---
|
||||
- hosts: all
|
||||
gather_facts: false
|
||||
connection: local
|
||||
tasks:
|
||||
- demo.external.example:
|
||||
register: result
|
||||
- debug: var=result
|
||||
@@ -6,14 +6,13 @@ from dispatcherd.publish import task
|
||||
from django.db import connection
|
||||
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.publish import task as old_task
|
||||
|
||||
from ansible_base.lib.utils.db import advisory_lock
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@old_task(queue=get_task_queuename)
|
||||
@task(queue=get_task_queuename)
|
||||
def sleep_task(seconds=10, log=False):
|
||||
if log:
|
||||
logger.info('starting sleep_task')
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import pytest
|
||||
|
||||
from django.test import RequestFactory
|
||||
from prometheus_client.parser import text_string_to_metric_families
|
||||
from rest_framework.request import Request
|
||||
from awx.main import models
|
||||
from awx.main.analytics.metrics import metrics
|
||||
from awx.main.analytics.dispatcherd_metrics import get_dispatcherd_metrics
|
||||
from awx.api.versioning import reverse
|
||||
|
||||
EXPECTED_VALUES = {
|
||||
@@ -77,3 +80,55 @@ def test_metrics_http_methods(get, post, patch, put, options, admin):
|
||||
assert patch(get_metrics_view_db_only(), user=admin).status_code == 405
|
||||
assert post(get_metrics_view_db_only(), user=admin).status_code == 405
|
||||
assert options(get_metrics_view_db_only(), user=admin).status_code == 200
|
||||
|
||||
|
||||
class DummyMetricsResponse:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def read(self):
|
||||
return self._payload
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
|
||||
def test_dispatcherd_metrics_node_filter_match(mocker, settings):
|
||||
settings.CLUSTER_HOST_ID = "awx-1"
|
||||
payload = b'# HELP test_metric A test metric\n# TYPE test_metric gauge\ntest_metric 1\n'
|
||||
|
||||
def fake_urlopen(url, timeout=1.0):
|
||||
return DummyMetricsResponse(payload)
|
||||
|
||||
mocker.patch('urllib.request.urlopen', fake_urlopen)
|
||||
|
||||
request = Request(RequestFactory().get('/api/v2/metrics/', {'node': 'awx-1'}))
|
||||
|
||||
assert get_dispatcherd_metrics(request) == payload.decode('utf-8')
|
||||
|
||||
|
||||
def test_dispatcherd_metrics_node_filter_excludes_local(mocker, settings):
|
||||
settings.CLUSTER_HOST_ID = "awx-1"
|
||||
|
||||
def fake_urlopen(*args, **kwargs):
|
||||
raise AssertionError("urlopen should not be called when node filter excludes local node")
|
||||
|
||||
mocker.patch('urllib.request.urlopen', fake_urlopen)
|
||||
|
||||
request = Request(RequestFactory().get('/api/v2/metrics/', {'node': 'awx-2'}))
|
||||
|
||||
assert get_dispatcherd_metrics(request) == ''
|
||||
|
||||
|
||||
def test_dispatcherd_metrics_metric_filter_excludes_unrelated(mocker):
|
||||
def fake_urlopen(*args, **kwargs):
|
||||
raise AssertionError("urlopen should not be called when metric filter excludes dispatcherd metrics")
|
||||
|
||||
mocker.patch('urllib.request.urlopen', fake_urlopen)
|
||||
|
||||
request = Request(RequestFactory().get('/api/v2/metrics/', {'metric': 'awx_system_info'}))
|
||||
|
||||
assert get_dispatcherd_metrics(request) == ''
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import pytest
|
||||
|
||||
from ansible_base.lib.testing.util import feature_flag_enabled, feature_flag_disabled
|
||||
|
||||
from awx.main.models import CredentialInputSource
|
||||
from awx.api.versioning import reverse
|
||||
|
||||
@@ -316,3 +318,60 @@ def test_create_credential_input_source_with_already_used_input_returns_400(post
|
||||
]
|
||||
all_responses = [post(list_url, params, admin) for params in all_params]
|
||||
assert all_responses.pop().status_code == 400
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_credential_input_source_passes_workload_identity_token_when_flag_enabled(vault_credential, external_credential, mocker):
|
||||
"""Test that workload_identity_token is passed to backend when flag is enabled."""
|
||||
with feature_flag_enabled('FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED'):
|
||||
# Add workload_identity_token as an internal field on the external credential type
|
||||
# so get_input_value resolves it from the per-input-source context
|
||||
external_credential.credential_type.inputs['fields'].append(
|
||||
{'id': 'workload_identity_token', 'label': 'Workload Identity Token', 'type': 'string', 'internal': True}
|
||||
)
|
||||
|
||||
# Create an input source
|
||||
input_source = CredentialInputSource.objects.create(
|
||||
target_credential=vault_credential,
|
||||
source_credential=external_credential,
|
||||
input_field_name='vault_password',
|
||||
metadata={'key': 'test_key'},
|
||||
)
|
||||
|
||||
# Mock the credential plugin backend
|
||||
mock_backend = mocker.patch.object(external_credential.credential_type.plugin, 'backend', autospec=True, return_value='test_value')
|
||||
|
||||
# Call with context keyed by input source PK
|
||||
test_context = {input_source.pk: {'workload_identity_token': 'jwt_token_here'}}
|
||||
result = input_source.get_input_value(context=test_context)
|
||||
|
||||
# Verify backend was called with workload_identity_token
|
||||
assert result == 'test_value'
|
||||
call_kwargs = mock_backend.call_args[1]
|
||||
assert call_kwargs['workload_identity_token'] == 'jwt_token_here'
|
||||
assert call_kwargs['key'] == 'test_key'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_credential_input_source_skips_workload_identity_token_when_flag_disabled(vault_credential, external_credential, mocker):
|
||||
"""Test that workload_identity_token is NOT passed when flag is disabled."""
|
||||
with feature_flag_disabled('FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED'):
|
||||
# Create an input source
|
||||
input_source = CredentialInputSource.objects.create(
|
||||
target_credential=vault_credential,
|
||||
source_credential=external_credential,
|
||||
input_field_name='vault_password',
|
||||
metadata={'key': 'test_key'},
|
||||
)
|
||||
# Mock the credential plugin backend
|
||||
mock_backend = mocker.patch.object(external_credential.credential_type.plugin, 'backend', autospec=True, return_value='test_value')
|
||||
# Call with context containing workload_identity_token but NO internal field defined,
|
||||
# simulating a flag-disabled scenario where tokens are not generated upstream
|
||||
test_context = {input_source.pk: {'workload_identity_token': 'jwt_token_here'}}
|
||||
result = input_source.get_input_value(context=test_context)
|
||||
# Verify backend was called WITHOUT workload_identity_token since the credential type
|
||||
# does not define it as an internal field (flag-disabled path doesn't register it)
|
||||
assert result == 'test_value'
|
||||
call_kwargs = mock_backend.call_args[1]
|
||||
assert 'workload_identity_token' not in call_kwargs
|
||||
assert call_kwargs['key'] == 'test_key'
|
||||
|
||||
@@ -463,6 +463,26 @@ class TestInventorySourceCredential:
|
||||
assert 'Cloud-based inventory sources (such as ec2)' in r.data['credential'][0]
|
||||
assert 'require credentials for the matching cloud service' in r.data['credential'][0]
|
||||
|
||||
def test_credential_dict_value_returns_400(self, inventory, admin_user, put):
|
||||
"""Passing a dict for the credential field should return 400, not 500.
|
||||
|
||||
Reproduces a bug where int() raises TypeError on non-scalar types
|
||||
(dict, list) which was uncaught, resulting in a 500 Internal Server Error.
|
||||
"""
|
||||
inv_src = InventorySource.objects.create(name='test-src', inventory=inventory, source='ec2')
|
||||
r = put(
|
||||
url=reverse('api:inventory_source_detail', kwargs={'pk': inv_src.pk}),
|
||||
data={
|
||||
'name': 'test-src',
|
||||
'inventory': inventory.pk,
|
||||
'source': 'ec2',
|
||||
'credential': {'username': 'admin', 'password': 'secret'},
|
||||
},
|
||||
user=admin_user,
|
||||
expect=400,
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
def test_vault_credential_not_allowed(self, project, inventory, vault_credential, admin_user, post):
|
||||
"""Vault credentials cannot be associated via the deprecated field"""
|
||||
# TODO: when feature is added, add tests to use the related credentials
|
||||
|
||||
@@ -0,0 +1,163 @@
|
||||
"""
|
||||
Tests for OIDC workload identity credential type feature flag.
|
||||
|
||||
The FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED flag is an install-time flag that
|
||||
controls whether OIDC credential types are loaded into the registry at startup.
|
||||
When disabled, OIDC credential types are not loaded and do not exist in the database.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest import mock
|
||||
|
||||
from django.test import override_settings
|
||||
|
||||
from awx.main.constants import OIDC_CREDENTIAL_TYPE_NAMESPACES
|
||||
from awx.main.models.credential import CredentialType, ManagedCredentialType, load_credentials
|
||||
from awx.api.versioning import reverse
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def reload_credentials_with_flag(django_db_setup, django_db_blocker):
|
||||
"""
|
||||
Fixture that reloads credentials with a specific flag state.
|
||||
This simulates what happens at application startup.
|
||||
"""
|
||||
# Save original registry state
|
||||
original_registry = ManagedCredentialType.registry.copy()
|
||||
|
||||
def _reload(flag_enabled):
|
||||
with django_db_blocker.unblock():
|
||||
# Clear the entire registry before reloading
|
||||
ManagedCredentialType.registry.clear()
|
||||
|
||||
# Reload credentials with the specified flag state
|
||||
with override_settings(FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED=flag_enabled):
|
||||
with mock.patch('awx.main.models.credential.detect_server_product_name', return_value='NOT_AWX'):
|
||||
load_credentials()
|
||||
|
||||
# Sync to database
|
||||
CredentialType.setup_tower_managed_defaults(lock=False)
|
||||
|
||||
# In tests, the session fixture pre-loads all credential types into the DB.
|
||||
# Remove OIDC types when testing the disabled state so the API test is accurate.
|
||||
if not flag_enabled:
|
||||
CredentialType.objects.filter(namespace__in=OIDC_CREDENTIAL_TYPE_NAMESPACES).delete()
|
||||
|
||||
yield _reload
|
||||
|
||||
# Restore original registry state after tests
|
||||
ManagedCredentialType.registry.clear()
|
||||
ManagedCredentialType.registry.update(original_registry)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_registry():
|
||||
"""Save and restore the ManagedCredentialType registry, with full isolation via mocked entry_points."""
|
||||
original_registry = ManagedCredentialType.registry.copy()
|
||||
ManagedCredentialType.registry.clear()
|
||||
yield
|
||||
ManagedCredentialType.registry.clear()
|
||||
ManagedCredentialType.registry.update(original_registry)
|
||||
|
||||
|
||||
def _make_mock_entry_point(name):
|
||||
"""Create a mock entry point that mimics a credential plugin."""
|
||||
ep = mock.MagicMock()
|
||||
ep.name = name
|
||||
ep.value = f'test_plugin:{name}'
|
||||
plugin = mock.MagicMock(spec=[])
|
||||
ep.load.return_value = plugin
|
||||
return ep
|
||||
|
||||
|
||||
def _mock_entry_points_factory(managed_names, supported_names):
|
||||
"""Return a side_effect function for mocking entry_points() with controlled plugins."""
|
||||
managed = [_make_mock_entry_point(n) for n in managed_names]
|
||||
supported = [_make_mock_entry_point(n) for n in supported_names]
|
||||
|
||||
def _entry_points(group):
|
||||
if group == 'awx_plugins.managed_credentials':
|
||||
return managed
|
||||
elif group == 'awx_plugins.managed_credentials.supported':
|
||||
return supported
|
||||
return []
|
||||
|
||||
return _entry_points
|
||||
|
||||
|
||||
# --- Unit tests for load_credentials() registry behavior ---
|
||||
|
||||
|
||||
def test_oidc_types_in_registry_when_flag_enabled(isolated_registry):
|
||||
"""Test that OIDC credential types are added to the registry when flag is enabled."""
|
||||
mock_eps = _mock_entry_points_factory(
|
||||
managed_names=['ssh', 'vault'],
|
||||
supported_names=['hashivault-kv-oidc', 'hashivault-ssh-oidc'],
|
||||
)
|
||||
with override_settings(FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED=True):
|
||||
with mock.patch('awx.main.models.credential.detect_server_product_name', return_value='NOT_AWX'):
|
||||
with mock.patch('awx.main.models.credential.entry_points', side_effect=mock_eps):
|
||||
load_credentials()
|
||||
|
||||
for ns in OIDC_CREDENTIAL_TYPE_NAMESPACES:
|
||||
assert ns in ManagedCredentialType.registry, f"{ns} should be in registry when flag is enabled"
|
||||
assert 'ssh' in ManagedCredentialType.registry
|
||||
assert 'vault' in ManagedCredentialType.registry
|
||||
|
||||
|
||||
def test_oidc_types_not_in_registry_when_flag_disabled(isolated_registry):
|
||||
"""Test that OIDC credential types are excluded from the registry when flag is disabled."""
|
||||
mock_eps = _mock_entry_points_factory(
|
||||
managed_names=['ssh', 'vault'],
|
||||
supported_names=['hashivault-kv-oidc', 'hashivault-ssh-oidc'],
|
||||
)
|
||||
with override_settings(FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED=False):
|
||||
with mock.patch('awx.main.models.credential.detect_server_product_name', return_value='NOT_AWX'):
|
||||
with mock.patch('awx.main.models.credential.entry_points', side_effect=mock_eps):
|
||||
load_credentials()
|
||||
|
||||
for ns in OIDC_CREDENTIAL_TYPE_NAMESPACES:
|
||||
assert ns not in ManagedCredentialType.registry, f"{ns} should not be in registry when flag is disabled"
|
||||
# Non-OIDC types should still be loaded
|
||||
assert 'ssh' in ManagedCredentialType.registry
|
||||
assert 'vault' in ManagedCredentialType.registry
|
||||
|
||||
|
||||
def test_oidc_namespaces_constant():
|
||||
"""Test that OIDC_CREDENTIAL_TYPE_NAMESPACES contains the expected namespaces."""
|
||||
assert 'hashivault-kv-oidc' in OIDC_CREDENTIAL_TYPE_NAMESPACES
|
||||
assert 'hashivault-ssh-oidc' in OIDC_CREDENTIAL_TYPE_NAMESPACES
|
||||
assert len(OIDC_CREDENTIAL_TYPE_NAMESPACES) == 2
|
||||
|
||||
|
||||
# --- Functional API tests ---
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_oidc_types_loaded_when_flag_enabled(get, admin, reload_credentials_with_flag):
|
||||
"""Test that OIDC credential types are visible in the API when flag is enabled."""
|
||||
reload_credentials_with_flag(flag_enabled=True)
|
||||
|
||||
response = get(reverse('api:credential_type_list'), admin)
|
||||
assert response.status_code == 200
|
||||
|
||||
namespaces = [ct['namespace'] for ct in response.data['results']]
|
||||
assert 'hashivault-kv-oidc' in namespaces
|
||||
assert 'hashivault-ssh-oidc' in namespaces
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_oidc_types_not_loaded_when_flag_disabled(get, admin, reload_credentials_with_flag):
|
||||
"""Test that OIDC credential types are not visible in the API when flag is disabled."""
|
||||
reload_credentials_with_flag(flag_enabled=False)
|
||||
|
||||
response = get(reverse('api:credential_type_list'), admin)
|
||||
assert response.status_code == 200
|
||||
|
||||
namespaces = [ct['namespace'] for ct in response.data['results']]
|
||||
assert 'hashivault-kv-oidc' not in namespaces
|
||||
assert 'hashivault-ssh-oidc' not in namespaces
|
||||
|
||||
# Verify they're also not in the database
|
||||
assert not CredentialType.objects.filter(namespace='hashivault-kv-oidc').exists()
|
||||
assert not CredentialType.objects.filter(namespace='hashivault-ssh-oidc').exists()
|
||||
@@ -1,4 +1,3 @@
|
||||
from datetime import date
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
@@ -253,7 +252,7 @@ def test_user_verify_attribute_created(admin, get):
|
||||
resp = get(reverse('api:user_detail', kwargs={'pk': admin.pk}), admin)
|
||||
assert resp.data['created'] == admin.date_joined
|
||||
|
||||
past = date(2020, 1, 1).isoformat()
|
||||
past = "2020-01-01T00:00:00Z"
|
||||
for op, count in (('gt', 1), ('lt', 0)):
|
||||
resp = get(reverse('api:user_list') + f'?created__{op}={past}', admin)
|
||||
assert resp.data['count'] == count
|
||||
|
||||
@@ -48,7 +48,7 @@ class TestCallbackBrokerWorker(TransactionTestCase):
|
||||
worker = CallbackBrokerWorker()
|
||||
events = [InventoryUpdateEvent(uuid=str(uuid4()), **self.event_create_kwargs())]
|
||||
worker.buff = {InventoryUpdateEvent: events}
|
||||
worker.flush()
|
||||
worker.flush(force=True)
|
||||
assert worker.buff.get(InventoryUpdateEvent, []) == []
|
||||
assert InventoryUpdateEvent.objects.filter(uuid=events[0].uuid).count() == 1
|
||||
|
||||
@@ -61,7 +61,7 @@ class TestCallbackBrokerWorker(TransactionTestCase):
|
||||
InventoryUpdateEvent(uuid=str(uuid4()), stdout='good2', **kwargs),
|
||||
]
|
||||
worker.buff = {InventoryUpdateEvent: events.copy()}
|
||||
worker.flush()
|
||||
worker.flush(force=True)
|
||||
assert InventoryUpdateEvent.objects.filter(uuid=events[0].uuid).count() == 1
|
||||
assert InventoryUpdateEvent.objects.filter(uuid=events[1].uuid).count() == 0
|
||||
assert InventoryUpdateEvent.objects.filter(uuid=events[2].uuid).count() == 1
|
||||
@@ -71,7 +71,7 @@ class TestCallbackBrokerWorker(TransactionTestCase):
|
||||
worker = CallbackBrokerWorker()
|
||||
events = [InventoryUpdateEvent(uuid=str(uuid4()), **self.event_create_kwargs())]
|
||||
worker.buff = {InventoryUpdateEvent: events.copy()}
|
||||
worker.flush()
|
||||
worker.flush(force=True)
|
||||
|
||||
# put current saved event in buffer (error case)
|
||||
worker.buff = {InventoryUpdateEvent: [InventoryUpdateEvent.objects.get(uuid=events[0].uuid)]}
|
||||
@@ -113,7 +113,7 @@ class TestCallbackBrokerWorker(TransactionTestCase):
|
||||
|
||||
with mock.patch.object(InventoryUpdateEvent.objects, 'bulk_create', side_effect=ValueError):
|
||||
with mock.patch.object(events[0], 'save', side_effect=ValueError):
|
||||
worker.flush()
|
||||
worker.flush(force=True)
|
||||
|
||||
assert "\x00" not in events[0].stdout
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ def test_feature_flags_list_endpoint_override(get, flag_val):
|
||||
bob = User.objects.create(username='bob', password='test_user', is_superuser=True)
|
||||
|
||||
AAPFlag.objects.all().delete()
|
||||
flag_name = "FEATURE_DISPATCHERD_ENABLED"
|
||||
flag_name = "FEATURE_INDIRECT_NODE_COUNTING_ENABLED"
|
||||
setattr(settings, flag_name, flag_val)
|
||||
seed_feature_flags()
|
||||
url = "/api/v2/feature_flags/states/"
|
||||
|
||||
17
awx/main/tests/functional/management/test_dispatcherd.py
Normal file
17
awx/main/tests/functional/management/test_dispatcherd.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import pytest
|
||||
|
||||
from awx.main.dispatch.config import get_dispatcherd_config
|
||||
from awx.main.management.commands.dispatcherd import _hash_config
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_dispatcherd_config_hash_is_stable(settings, monkeypatch):
|
||||
monkeypatch.setenv('AWX_COMPONENT', 'dispatcher')
|
||||
settings.CLUSTER_HOST_ID = 'test-node'
|
||||
settings.JOB_EVENT_WORKERS = 1
|
||||
settings.DISPATCHER_SCHEDULE = {}
|
||||
|
||||
config_one = get_dispatcherd_config(for_service=True)
|
||||
config_two = get_dispatcherd_config(for_service=True)
|
||||
|
||||
assert _hash_config(config_one) == _hash_config(config_two)
|
||||
@@ -3,19 +3,33 @@ import pytest
|
||||
# AWX
|
||||
from awx.main.ha import is_ha_environment
|
||||
from awx.main.models.ha import Instance
|
||||
from awx.main.dispatch.pool import get_auto_max_workers
|
||||
from awx.main.utils.common import get_auto_max_workers
|
||||
|
||||
# Django
|
||||
from django.test.utils import override_settings
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_multiple_instances():
|
||||
for i in range(2):
|
||||
def test_multiple_hybrid_instances():
|
||||
for i in range(3):
|
||||
Instance.objects.create(hostname=f'foo{i}', node_type='hybrid')
|
||||
assert is_ha_environment()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_double_control_instances():
|
||||
for i in range(2):
|
||||
Instance.objects.create(hostname=f'foo{i}', node_type='control')
|
||||
assert is_ha_environment()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_mix_hybrid_control_instances():
|
||||
Instance.objects.create(hostname='control_node', node_type='control')
|
||||
Instance.objects.create(hostname='hybrid_node', node_type='hybrid')
|
||||
assert is_ha_environment()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_db_localhost():
|
||||
Instance.objects.create(hostname='foo', node_type='hybrid')
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Project, Organization
|
||||
from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Host, Project, Organization
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@@ -87,3 +87,47 @@ class TestSlicingModels:
|
||||
|
||||
unified_job = job_template.create_unified_job(job_slice_count=2)
|
||||
assert isinstance(unified_job, Job)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestGetSourceHostsForConstructedInventory:
|
||||
"""Tests for Job.get_source_hosts_for_constructed_inventory"""
|
||||
|
||||
def test_returns_source_hosts_via_instance_id(self):
|
||||
"""Constructed hosts with instance_id pointing to source hosts are resolved correctly."""
|
||||
org = Organization.objects.create(name='test-org')
|
||||
inv_input = Inventory.objects.create(organization=org, name='input-inv')
|
||||
source_host1 = inv_input.hosts.create(name='host1')
|
||||
source_host2 = inv_input.hosts.create(name='host2')
|
||||
|
||||
inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
|
||||
inv_constructed.input_inventories.add(inv_input)
|
||||
Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host1.id))
|
||||
Host.objects.create(inventory=inv_constructed, name='host2', instance_id=str(source_host2.id))
|
||||
|
||||
job = Job.objects.create(name='test-job', inventory=inv_constructed)
|
||||
result = job.get_source_hosts_for_constructed_inventory()
|
||||
|
||||
assert set(result.values_list('id', flat=True)) == {source_host1.id, source_host2.id}
|
||||
|
||||
def test_no_inventory_returns_empty(self):
|
||||
"""A job with no inventory returns an empty queryset."""
|
||||
job = Job.objects.create(name='test-job')
|
||||
result = job.get_source_hosts_for_constructed_inventory()
|
||||
assert result.count() == 0
|
||||
|
||||
def test_ignores_hosts_without_instance_id(self):
|
||||
"""Hosts with empty instance_id are excluded from the result."""
|
||||
org = Organization.objects.create(name='test-org')
|
||||
inv_input = Inventory.objects.create(organization=org, name='input-inv')
|
||||
source_host = inv_input.hosts.create(name='host1')
|
||||
|
||||
inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
|
||||
inv_constructed.input_inventories.add(inv_input)
|
||||
Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host.id))
|
||||
Host.objects.create(inventory=inv_constructed, name='host-no-ref', instance_id='')
|
||||
|
||||
job = Job.objects.create(name='test-job', inventory=inv_constructed)
|
||||
result = job.get_source_hosts_for_constructed_inventory()
|
||||
|
||||
assert list(result.values_list('id', flat=True)) == [source_host.id]
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import itertools
|
||||
import pytest
|
||||
from uuid import uuid4
|
||||
|
||||
# CRUM
|
||||
from crum import impersonate
|
||||
@@ -33,6 +34,64 @@ def test_soft_unique_together(post, project, admin_user):
|
||||
assert 'combination already exists' in str(r.data)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestJobCancel:
|
||||
"""
|
||||
Coverage for UnifiedJob.cancel, focused on interaction with dispatcherd objects.
|
||||
Using mocks for the dispatcherd objects, because tests by default use a no-op broker.
|
||||
"""
|
||||
|
||||
def test_cancel_sets_flag_and_clears_start_args(self, mocker):
|
||||
job = Job.objects.create(status='running', name='foo-job', celery_task_id=str(uuid4()), controller_node='foo', start_args='{"secret": "value"}')
|
||||
job.websocket_emit_status = mocker.MagicMock()
|
||||
|
||||
assert job.can_cancel is True
|
||||
assert job.cancel_flag is False
|
||||
|
||||
job.cancel()
|
||||
job.refresh_from_db()
|
||||
|
||||
assert job.cancel_flag is True
|
||||
assert job.start_args == ''
|
||||
|
||||
def test_cancel_sets_job_explanation(self, mocker):
|
||||
job = Job.objects.create(status='running', name='foo-job', celery_task_id=str(uuid4()), controller_node='foo')
|
||||
job.websocket_emit_status = mocker.MagicMock()
|
||||
job_explanation = 'giggity giggity'
|
||||
|
||||
job.cancel(job_explanation=job_explanation)
|
||||
job.refresh_from_db()
|
||||
|
||||
assert job.job_explanation == job_explanation
|
||||
|
||||
def test_cancel_sends_control_message(self, mocker):
|
||||
celery_task_id = str(uuid4())
|
||||
job = Job.objects.create(status='running', name='foo-job', celery_task_id=celery_task_id, controller_node='foo')
|
||||
job.websocket_emit_status = mocker.MagicMock()
|
||||
control = mocker.MagicMock()
|
||||
get_control = mocker.patch('awx.main.models.unified_jobs.get_control_from_settings', return_value=control)
|
||||
|
||||
job.cancel()
|
||||
|
||||
get_control.assert_called_once_with(default_publish_channel='foo')
|
||||
control.control.assert_called_once_with('cancel', data={'uuid': celery_task_id})
|
||||
|
||||
def test_cancel_refreshes_task_id_before_sending_control(self, mocker):
|
||||
job = Job.objects.create(status='pending', name='foo-job', celery_task_id='', controller_node='bar')
|
||||
job.websocket_emit_status = mocker.MagicMock()
|
||||
celery_task_id = str(uuid4())
|
||||
Job.objects.filter(pk=job.pk).update(status='running', celery_task_id=celery_task_id)
|
||||
control = mocker.MagicMock()
|
||||
get_control = mocker.patch('awx.main.models.unified_jobs.get_control_from_settings', return_value=control)
|
||||
refresh_spy = mocker.spy(job, 'refresh_from_db')
|
||||
|
||||
job.cancel()
|
||||
|
||||
refresh_spy.assert_called_once_with(fields=['celery_task_id', 'controller_node'])
|
||||
get_control.assert_called_once_with(default_publish_channel='bar')
|
||||
control.control.assert_called_once_with('cancel', data={'uuid': celery_task_id})
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCreateUnifiedJob:
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,274 @@
|
||||
# Generated by Claude Opus 4.6 (claude-opus-4-6)
|
||||
#
|
||||
# Test file for cancel + dependency chain behavior and workflow cancel propagation.
|
||||
#
|
||||
# These tests verify:
|
||||
#
|
||||
# 1. TaskManager.process_job_dep_failures() correctly distinguishes canceled vs
|
||||
# failed dependencies in the job_explanation message.
|
||||
#
|
||||
# 2. TaskManager.process_pending_tasks() transitions pending jobs with
|
||||
# cancel_flag=True directly to canceled status.
|
||||
#
|
||||
# 3. WorkflowManager + TaskManager together cancel all spawned jobs in a
|
||||
# workflow and finalize the workflow as canceled.
|
||||
|
||||
import pytest
|
||||
from unittest import mock
|
||||
|
||||
from awx.main.scheduler import TaskManager, DependencyManager, WorkflowManager
|
||||
from awx.main.models import JobTemplate, ProjectUpdate, WorkflowApproval, WorkflowJobTemplate
|
||||
from awx.main.models.workflow import WorkflowApprovalTemplate
|
||||
from . import create_job
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def scm_on_launch_objects(job_template_factory):
|
||||
"""Create a job template with a project configured for scm_update_on_launch."""
|
||||
objects = job_template_factory(
|
||||
'jt',
|
||||
organization='org1',
|
||||
project='proj',
|
||||
inventory='inv',
|
||||
credential='cred',
|
||||
)
|
||||
p = objects.project
|
||||
p.scm_update_on_launch = True
|
||||
p.scm_update_cache_timeout = 0
|
||||
p.save(skip_update=True)
|
||||
return objects
|
||||
|
||||
|
||||
def _create_job_with_dependency(objects):
|
||||
"""Create a pending job and run DependencyManager to produce its project update dependency.
|
||||
|
||||
Returns (job, project_update).
|
||||
"""
|
||||
j = create_job(objects.job_template, dependencies_processed=False)
|
||||
with mock.patch('awx.main.models.unified_jobs.UnifiedJobTemplate.update'):
|
||||
DependencyManager().schedule()
|
||||
assert j.dependent_jobs.count() == 1
|
||||
pu = j.dependent_jobs.first()
|
||||
assert isinstance(pu.get_real_instance(), ProjectUpdate)
|
||||
return j, pu
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCanceledDependencyFailsBlockedJob:
|
||||
"""When a dependency project update is canceled or failed, the task manager
|
||||
should fail the blocked job via process_job_dep_failures."""
|
||||
|
||||
def test_canceled_dependency_fails_blocked_job(self, controlplane_instance_group, scm_on_launch_objects):
|
||||
"""A canceled dependency causes the blocked job to be failed with
|
||||
a 'Previous Task Canceled' explanation."""
|
||||
j, pu = _create_job_with_dependency(scm_on_launch_objects)
|
||||
|
||||
ProjectUpdate.objects.filter(pk=pu.pk).update(status='canceled', cancel_flag=True)
|
||||
|
||||
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
|
||||
TaskManager().schedule()
|
||||
|
||||
j.refresh_from_db()
|
||||
assert j.status == 'failed'
|
||||
assert 'Previous Task Canceled' in j.job_explanation
|
||||
|
||||
def test_failed_dependency_fails_blocked_job(self, controlplane_instance_group, scm_on_launch_objects):
|
||||
"""A failed dependency causes the blocked job to be failed with
|
||||
a 'Previous Task Failed' explanation."""
|
||||
j, pu = _create_job_with_dependency(scm_on_launch_objects)
|
||||
|
||||
ProjectUpdate.objects.filter(pk=pu.pk).update(status='failed')
|
||||
|
||||
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
|
||||
TaskManager().schedule()
|
||||
|
||||
j.refresh_from_db()
|
||||
assert j.status == 'failed'
|
||||
assert 'Previous Task Failed' in j.job_explanation
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskManagerCancelsPendingJobsWithCancelFlag:
|
||||
"""When the task manager encounters pending jobs that have cancel_flag set,
|
||||
it should transition them directly to canceled status."""
|
||||
|
||||
def test_pending_job_with_cancel_flag_is_canceled(self, controlplane_instance_group, job_template_factory):
|
||||
"""A pending job with cancel_flag=True is transitioned to canceled
|
||||
by the task manager without being started."""
|
||||
objects = job_template_factory(
|
||||
'jt',
|
||||
organization='org1',
|
||||
project='proj',
|
||||
inventory='inv',
|
||||
credential='cred',
|
||||
)
|
||||
j = create_job(objects.job_template)
|
||||
j.cancel_flag = True
|
||||
j.save(update_fields=['cancel_flag'])
|
||||
|
||||
with mock.patch("awx.main.scheduler.TaskManager.start_task") as mock_start:
|
||||
TaskManager().schedule()
|
||||
|
||||
j.refresh_from_db()
|
||||
assert j.status == 'canceled'
|
||||
assert 'canceled before it started' in j.job_explanation
|
||||
assert not mock_start.called
|
||||
|
||||
def test_pending_job_without_cancel_flag_is_not_canceled(self, controlplane_instance_group, job_template_factory):
|
||||
"""A normal pending job without cancel_flag should not be canceled
|
||||
by the task manager (sanity check)."""
|
||||
objects = job_template_factory(
|
||||
'jt',
|
||||
organization='org1',
|
||||
project='proj',
|
||||
inventory='inv',
|
||||
credential='cred',
|
||||
)
|
||||
j = create_job(objects.job_template)
|
||||
|
||||
with mock.patch("awx.main.scheduler.TaskManager.start_task"):
|
||||
TaskManager().schedule()
|
||||
|
||||
j.refresh_from_db()
|
||||
assert j.status != 'canceled'
|
||||
|
||||
def test_multiple_pending_jobs_with_cancel_flag_bulk_canceled(self, controlplane_instance_group, job_template_factory):
|
||||
"""Multiple pending jobs with cancel_flag=True are all transitioned
|
||||
to canceled in a single task manager cycle."""
|
||||
objects = job_template_factory(
|
||||
'jt',
|
||||
organization='org1',
|
||||
project='proj',
|
||||
inventory='inv',
|
||||
credential='cred',
|
||||
)
|
||||
jt = objects.job_template
|
||||
jt.allow_simultaneous = True
|
||||
jt.save()
|
||||
|
||||
jobs = []
|
||||
for _ in range(3):
|
||||
j = create_job(jt)
|
||||
j.cancel_flag = True
|
||||
j.save(update_fields=['cancel_flag'])
|
||||
jobs.append(j)
|
||||
|
||||
with mock.patch("awx.main.scheduler.TaskManager.start_task") as mock_start:
|
||||
TaskManager().schedule()
|
||||
|
||||
for j in jobs:
|
||||
j.refresh_from_db()
|
||||
assert j.status == 'canceled', f"Job {j.id} should be canceled but is {j.status}"
|
||||
assert 'canceled before it started' in j.job_explanation
|
||||
assert not mock_start.called
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestWorkflowCancelFinalizesWorkflow:
|
||||
"""When a workflow job is canceled, the WorkflowManager cancels spawned child
|
||||
jobs (setting cancel_flag), the TaskManager transitions those pending jobs to
|
||||
canceled, and a final WorkflowManager pass finalizes the workflow as canceled."""
|
||||
|
||||
def test_cancel_workflow_with_parallel_nodes(self, inventory, project, controlplane_instance_group):
|
||||
"""Create a workflow with parallel nodes, cancel it after one job is
|
||||
running, and verify all jobs and the workflow reach canceled status."""
|
||||
jt = JobTemplate.objects.create(allow_simultaneous=False, inventory=inventory, project=project, playbook='helloworld.yml')
|
||||
wfjt = WorkflowJobTemplate.objects.create(name='test-cancel-wf')
|
||||
for _ in range(4):
|
||||
wfjt.workflow_nodes.create(unified_job_template=jt)
|
||||
|
||||
wj = wfjt.create_unified_job()
|
||||
wj.signal_start()
|
||||
|
||||
# TaskManager transitions workflow job to running via start_task
|
||||
TaskManager().schedule()
|
||||
wj.refresh_from_db()
|
||||
assert wj.status == 'running'
|
||||
|
||||
# WorkflowManager spawns jobs for all 4 nodes
|
||||
WorkflowManager().schedule()
|
||||
assert jt.jobs.count() == 4
|
||||
|
||||
# Simulate one job running (blocking the others via allow_simultaneous=False)
|
||||
first_job = jt.jobs.order_by('created').first()
|
||||
first_job.status = 'running'
|
||||
first_job.celery_task_id = 'fake-task-id'
|
||||
first_job.controller_node = 'test-node'
|
||||
first_job.save(update_fields=['status', 'celery_task_id', 'controller_node'])
|
||||
|
||||
# Cancel the workflow
|
||||
wj.cancel_flag = True
|
||||
wj.save(update_fields=['cancel_flag'])
|
||||
|
||||
# WorkflowManager sees cancel_flag, calls cancel_node_jobs() which sets
|
||||
# cancel_flag on all child jobs
|
||||
with mock.patch('awx.main.models.unified_jobs.UnifiedJob.cancel_dispatcher_process'):
|
||||
WorkflowManager().schedule()
|
||||
|
||||
# The running job won't actually stop in tests (no dispatcher), simulate it
|
||||
first_job.status = 'canceled'
|
||||
first_job.save(update_fields=['status'])
|
||||
|
||||
# TaskManager processes remaining pending jobs with cancel_flag set
|
||||
with mock.patch("awx.main.scheduler.TaskManager.start_task") as mock_start:
|
||||
DependencyManager().schedule()
|
||||
TaskManager().schedule()
|
||||
|
||||
for job in jt.jobs.all():
|
||||
job.refresh_from_db()
|
||||
assert job.status == 'canceled', f"Job {job.id} should be canceled but is {job.status}"
|
||||
assert not mock_start.called
|
||||
|
||||
# Final WorkflowManager pass finalizes the workflow
|
||||
WorkflowManager().schedule()
|
||||
wj.refresh_from_db()
|
||||
assert wj.status == 'canceled'
|
||||
|
||||
def test_cancel_workflow_with_approval_node(self, controlplane_instance_group):
|
||||
"""Create a workflow with a pending approval node and a downstream job
|
||||
node. Cancel the workflow and verify the approval is directly canceled
|
||||
by the WorkflowManager (since approvals are excluded from TaskManager),
|
||||
the downstream node is marked do_not_run, and the workflow finalizes."""
|
||||
approval_template = WorkflowApprovalTemplate.objects.create(name='test-approval', timeout=0)
|
||||
wfjt = WorkflowJobTemplate.objects.create(name='test-cancel-approval-wf')
|
||||
approval_node = wfjt.workflow_nodes.create(unified_job_template=approval_template)
|
||||
|
||||
# Add a downstream node (just another approval to keep it simple)
|
||||
downstream_template = WorkflowApprovalTemplate.objects.create(name='test-downstream', timeout=0)
|
||||
downstream_node = wfjt.workflow_nodes.create(unified_job_template=downstream_template)
|
||||
approval_node.success_nodes.add(downstream_node)
|
||||
|
||||
wj = wfjt.create_unified_job()
|
||||
wj.signal_start()
|
||||
|
||||
# TaskManager transitions workflow to running
|
||||
TaskManager().schedule()
|
||||
wj.refresh_from_db()
|
||||
assert wj.status == 'running'
|
||||
|
||||
# WorkflowManager spawns the approval (root node only, downstream waits)
|
||||
WorkflowManager().schedule()
|
||||
assert WorkflowApproval.objects.filter(unified_job_node__workflow_job=wj).count() == 1
|
||||
|
||||
approval_job = WorkflowApproval.objects.get(unified_job_node__workflow_job=wj)
|
||||
assert approval_job.status == 'pending'
|
||||
|
||||
# Cancel the workflow
|
||||
wj.cancel_flag = True
|
||||
wj.save(update_fields=['cancel_flag'])
|
||||
|
||||
# WorkflowManager should cancel the approval directly and mark
|
||||
# the downstream node as do_not_run
|
||||
WorkflowManager().schedule()
|
||||
|
||||
approval_job.refresh_from_db()
|
||||
assert approval_job.status == 'canceled', f"Approval should be canceled directly by WorkflowManager but is {approval_job.status}"
|
||||
|
||||
# Downstream node should be marked do_not_run with no job spawned
|
||||
downstream_wj_node = wj.workflow_nodes.get(unified_job_template=downstream_template)
|
||||
assert downstream_wj_node.do_not_run is True
|
||||
assert downstream_wj_node.job is None
|
||||
|
||||
# Workflow should finalize as canceled in the same pass
|
||||
wj.refresh_from_db()
|
||||
assert wj.status == 'canceled'
|
||||
223
awx/main/tests/functional/tasks/test_fact_cache.py
Normal file
223
awx/main/tests/functional/tasks/test_fact_cache.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""Functional tests for start_fact_cache / finish_fact_cache.
|
||||
|
||||
These tests use real database objects (via pytest-django) and real files
|
||||
on disk, but do not launch jobs or subprocesses. Fact files are written
|
||||
by start_fact_cache and then manipulated to simulate ansible output
|
||||
before calling finish_fact_cache.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
import pytest
|
||||
|
||||
from django.utils.timezone import now
|
||||
|
||||
from awx.main.models import Host, Inventory
|
||||
from awx.main.tasks.facts import start_fact_cache, finish_fact_cache
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def artifacts_dir(tmp_path):
|
||||
d = tmp_path / 'artifacts'
|
||||
d.mkdir()
|
||||
return str(d)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestFinishFactCacheScoping:
|
||||
"""finish_fact_cache must only update hosts matched by the provided queryset."""
|
||||
|
||||
def test_same_hostname_different_inventories(self, organization, artifacts_dir):
|
||||
"""Two inventories share a hostname; only the targeted one should be updated.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
inv1 = Inventory.objects.create(organization=organization, name='scope-inv1')
|
||||
inv2 = Inventory.objects.create(organization=organization, name='scope-inv2')
|
||||
|
||||
host1 = inv1.hosts.create(name='shared')
|
||||
host2 = inv2.hosts.create(name='shared')
|
||||
|
||||
# Give both hosts initial facts
|
||||
for h in (host1, host2):
|
||||
h.ansible_facts = {'original': True}
|
||||
h.ansible_facts_modified = now()
|
||||
h.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
# start_fact_cache writes reference files for inv1's hosts
|
||||
start_fact_cache(inv1.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv1.id)
|
||||
|
||||
# Simulate ansible writing new facts for 'shared'
|
||||
fact_file = os.path.join(artifacts_dir, 'fact_cache', 'shared')
|
||||
future = time.time() + 60
|
||||
with open(fact_file, 'w') as f:
|
||||
json.dump({'updated': True}, f)
|
||||
os.utime(fact_file, (future, future))
|
||||
|
||||
# finish with inv1's hosts as the queryset
|
||||
finish_fact_cache(inv1.hosts, artifacts_dir=artifacts_dir, inventory_id=inv1.id)
|
||||
|
||||
host1.refresh_from_db()
|
||||
host2.refresh_from_db()
|
||||
|
||||
assert host1.ansible_facts == {'updated': True}
|
||||
assert host2.ansible_facts == {'original': True}, 'Host in a different inventory was modified despite not being in the queryset'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestFinishFactCacheConcurrentProtection:
|
||||
"""finish_fact_cache must not clear facts that a concurrent job updated."""
|
||||
|
||||
def test_no_clear_when_no_file_was_written(self, organization, artifacts_dir):
|
||||
"""Host with no prior facts should not have facts cleared when file is missing.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
|
||||
start_fact_cache records hosts_cached[host] = False for hosts with no
|
||||
prior facts (no file written). finish_fact_cache should skip the clear
|
||||
for these hosts because the missing file is expected, not a clear signal.
|
||||
"""
|
||||
inv = Inventory.objects.create(organization=organization, name='concurrent-inv')
|
||||
host = inv.hosts.create(name='target')
|
||||
|
||||
job_created = now() - timedelta(minutes=5)
|
||||
|
||||
# start_fact_cache records host with False (no facts → no file written)
|
||||
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
|
||||
|
||||
# Simulate a concurrent job updating this host's facts AFTER our job was created
|
||||
host.ansible_facts = {'from_concurrent_job': True}
|
||||
host.ansible_facts_modified = now()
|
||||
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
# The fact file is missing because start_fact_cache never wrote one.
|
||||
# finish_fact_cache should skip this host entirely.
|
||||
finish_fact_cache(
|
||||
inv.hosts,
|
||||
artifacts_dir=artifacts_dir,
|
||||
inventory_id=inv.id,
|
||||
job_created=job_created,
|
||||
)
|
||||
|
||||
host.refresh_from_db()
|
||||
assert host.ansible_facts == {'from_concurrent_job': True}, 'Facts were cleared for a host that never had a fact file written'
|
||||
|
||||
def test_skip_clear_when_facts_modified_after_job_created(self, organization, artifacts_dir):
|
||||
"""If a file was written and then deleted, but facts were concurrently updated, skip clear.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
inv = Inventory.objects.create(organization=organization, name='concurrent-written-inv')
|
||||
host = inv.hosts.create(name='target')
|
||||
|
||||
old_time = now() - timedelta(hours=1)
|
||||
host.ansible_facts = {'original': True}
|
||||
host.ansible_facts_modified = old_time
|
||||
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
job_created = now() - timedelta(minutes=5)
|
||||
|
||||
# start_fact_cache writes a file (host has facts → True in map)
|
||||
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
|
||||
|
||||
# Remove the fact file (ansible didn't target this host via --limit)
|
||||
os.remove(os.path.join(artifacts_dir, 'fact_cache', host.name))
|
||||
|
||||
# Simulate a concurrent job updating this host's facts AFTER our job was created
|
||||
host.ansible_facts = {'from_concurrent_job': True}
|
||||
host.ansible_facts_modified = now()
|
||||
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
finish_fact_cache(
|
||||
inv.hosts,
|
||||
artifacts_dir=artifacts_dir,
|
||||
inventory_id=inv.id,
|
||||
job_created=job_created,
|
||||
)
|
||||
|
||||
host.refresh_from_db()
|
||||
assert host.ansible_facts == {'from_concurrent_job': True}, 'Facts set by a concurrent job were cleared despite ansible_facts_modified > job_created'
|
||||
|
||||
def test_clear_when_facts_predate_job(self, organization, artifacts_dir):
|
||||
"""If facts predate the job, a missing file should still clear them.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
inv = Inventory.objects.create(organization=organization, name='clear-inv')
|
||||
host = inv.hosts.create(name='stale')
|
||||
|
||||
old_time = now() - timedelta(hours=1)
|
||||
host.ansible_facts = {'stale': True}
|
||||
host.ansible_facts_modified = old_time
|
||||
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
job_created = now() - timedelta(minutes=5)
|
||||
|
||||
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
|
||||
|
||||
# Remove the fact file to simulate ansible's clear_facts
|
||||
os.remove(os.path.join(artifacts_dir, 'fact_cache', host.name))
|
||||
|
||||
finish_fact_cache(
|
||||
inv.hosts,
|
||||
artifacts_dir=artifacts_dir,
|
||||
inventory_id=inv.id,
|
||||
job_created=job_created,
|
||||
)
|
||||
|
||||
host.refresh_from_db()
|
||||
assert host.ansible_facts == {}, 'Stale facts should have been cleared when the fact file is missing ' 'and ansible_facts_modified predates job_created'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestConstructedInventoryFactCache:
|
||||
"""finish_fact_cache with a constructed inventory queryset must target source hosts."""
|
||||
|
||||
def test_facts_resolve_to_source_host(self, organization, artifacts_dir):
|
||||
"""Facts must be written to the source host, not the constructed copy.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
from django.db.models.functions import Cast
|
||||
|
||||
inv_input = Inventory.objects.create(organization=organization, name='ci-input')
|
||||
source_host = inv_input.hosts.create(name='webserver')
|
||||
|
||||
inv_constructed = Inventory.objects.create(organization=organization, name='ci-constructed', kind='constructed')
|
||||
inv_constructed.input_inventories.add(inv_input)
|
||||
constructed_host = Host.objects.create(
|
||||
inventory=inv_constructed,
|
||||
name='webserver',
|
||||
instance_id=str(source_host.id),
|
||||
)
|
||||
|
||||
# Build the same queryset that get_hosts_for_fact_cache uses
|
||||
id_field = Host._meta.get_field('id')
|
||||
source_qs = Host.objects.filter(id__in=inv_constructed.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
|
||||
|
||||
# Give the source host initial facts so start_fact_cache writes a file
|
||||
source_host.ansible_facts = {'role': 'web'}
|
||||
source_host.ansible_facts_modified = now()
|
||||
source_host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
start_fact_cache(source_qs, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv_constructed.id)
|
||||
|
||||
# Simulate ansible writing updated facts
|
||||
fact_file = os.path.join(artifacts_dir, 'fact_cache', 'webserver')
|
||||
future = time.time() + 60
|
||||
with open(fact_file, 'w') as f:
|
||||
json.dump({'role': 'web', 'deployed': True}, f)
|
||||
os.utime(fact_file, (future, future))
|
||||
|
||||
finish_fact_cache(source_qs, artifacts_dir=artifacts_dir, inventory_id=inv_constructed.id)
|
||||
|
||||
source_host.refresh_from_db()
|
||||
constructed_host.refresh_from_db()
|
||||
|
||||
assert source_host.ansible_facts == {'role': 'web', 'deployed': True}
|
||||
assert not constructed_host.ansible_facts, f'Facts were stored on the constructed host: {constructed_host.ansible_facts!r}'
|
||||
@@ -29,3 +29,30 @@ def test_cancel_flag_on_start(jt_linked, caplog):
|
||||
|
||||
job = Job.objects.get(id=job.id)
|
||||
assert job.status == 'canceled'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_runjob_run_can_accept_waiting_status(jt_linked, mocker):
|
||||
"""Test that RunJob.run() can accept a job in 'waiting' status and transition it to 'running'
|
||||
before the pre_run_hook is called"""
|
||||
job = jt_linked.create_unified_job()
|
||||
job.status = 'waiting'
|
||||
job.save()
|
||||
|
||||
status_at_pre_run = None
|
||||
|
||||
def capture_status(instance, private_data_dir):
|
||||
nonlocal status_at_pre_run
|
||||
instance.refresh_from_db()
|
||||
status_at_pre_run = instance.status
|
||||
|
||||
mock_pre_run = mocker.patch.object(RunJob, 'pre_run_hook', side_effect=capture_status)
|
||||
|
||||
task = RunJob()
|
||||
try:
|
||||
task.run(job.id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
mock_pre_run.assert_called_once()
|
||||
assert status_at_pre_run == 'running'
|
||||
|
||||
@@ -9,7 +9,7 @@ from unittest import mock
|
||||
import pytest
|
||||
|
||||
from awx.main.tasks.system import CleanupImagesAndFiles, execution_node_health_check, inspect_established_receptor_connections, clear_setting_cache
|
||||
from awx.main.management.commands.run_dispatcher import Command
|
||||
from awx.main.management.commands.dispatcherd import Command
|
||||
from awx.main.models import Instance, Job, ReceptorAddress, InstanceLink
|
||||
|
||||
|
||||
|
||||
@@ -74,47 +74,64 @@ GLqbpJyX2r3p/Rmo6mLY71SqpA==
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_default_cred_types():
|
||||
assert sorted(CredentialType.defaults.keys()) == sorted(
|
||||
[
|
||||
'aim',
|
||||
'aws',
|
||||
'aws_secretsmanager_credential',
|
||||
'azure_kv',
|
||||
'azure_rm',
|
||||
'bitbucket_dc_token',
|
||||
'centrify_vault_kv',
|
||||
'conjur',
|
||||
'controller',
|
||||
'galaxy_api_token',
|
||||
'gce',
|
||||
'github_token',
|
||||
'github_app_lookup',
|
||||
'gitlab_token',
|
||||
'gpg_public_key',
|
||||
'hashivault_kv',
|
||||
'hashivault_ssh',
|
||||
'hcp_terraform',
|
||||
'insights',
|
||||
'kubernetes_bearer_token',
|
||||
'net',
|
||||
'openstack',
|
||||
'registry',
|
||||
'rhv',
|
||||
'satellite6',
|
||||
'scm',
|
||||
'ssh',
|
||||
'terraform',
|
||||
'thycotic_dsv',
|
||||
'thycotic_tss',
|
||||
'vault',
|
||||
'vmware',
|
||||
]
|
||||
)
|
||||
expected = [
|
||||
'aim',
|
||||
'aws',
|
||||
'aws_secretsmanager_credential',
|
||||
'azure_kv',
|
||||
'azure_rm',
|
||||
'bitbucket_dc_token',
|
||||
'centrify_vault_kv',
|
||||
'conjur',
|
||||
'controller',
|
||||
'galaxy_api_token',
|
||||
'gce',
|
||||
'github_token',
|
||||
'github_app_lookup',
|
||||
'gitlab_token',
|
||||
'gpg_public_key',
|
||||
'hashivault_kv',
|
||||
'hashivault_ssh',
|
||||
'hcp_terraform',
|
||||
'insights',
|
||||
'kubernetes_bearer_token',
|
||||
'net',
|
||||
'openstack',
|
||||
'registry',
|
||||
'rhv',
|
||||
'satellite6',
|
||||
'scm',
|
||||
'ssh',
|
||||
'terraform',
|
||||
'thycotic_dsv',
|
||||
'thycotic_tss',
|
||||
'vault',
|
||||
'vmware',
|
||||
]
|
||||
assert sorted(CredentialType.defaults.keys()) == sorted(expected)
|
||||
assert 'hashivault-kv-oidc' not in CredentialType.defaults
|
||||
assert 'hashivault-ssh-oidc' not in CredentialType.defaults
|
||||
|
||||
for type_ in CredentialType.defaults.values():
|
||||
assert type_().managed is True
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_default_cred_types_with_oidc_enabled():
|
||||
from django.test import override_settings
|
||||
from awx.main.models.credential import load_credentials, ManagedCredentialType
|
||||
|
||||
original_registry = ManagedCredentialType.registry.copy()
|
||||
try:
|
||||
with override_settings(FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED=True):
|
||||
ManagedCredentialType.registry.clear()
|
||||
load_credentials()
|
||||
assert 'hashivault-kv-oidc' in CredentialType.defaults
|
||||
assert 'hashivault-ssh-oidc' in CredentialType.defaults
|
||||
finally:
|
||||
ManagedCredentialType.registry = original_registry
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_credential_creation(organization_factory):
|
||||
org = organization_factory('test').organization
|
||||
|
||||
@@ -1,20 +1,12 @@
|
||||
import datetime
|
||||
import multiprocessing
|
||||
import random
|
||||
import signal
|
||||
import time
|
||||
import yaml
|
||||
from unittest import mock
|
||||
from flags.state import disable_flag, enable_flag
|
||||
from django.utils.timezone import now as tz_now
|
||||
import pytest
|
||||
|
||||
from awx.main.models import Job, WorkflowJob, Instance
|
||||
from awx.main.dispatch import reaper
|
||||
from awx.main.dispatch.pool import StatefulPoolWorker, WorkerPool, AutoscalePool
|
||||
from awx.main.dispatch.publish import task
|
||||
from awx.main.dispatch.worker import BaseWorker, TaskWorker
|
||||
from awx.main.dispatch.periodic import Scheduler
|
||||
from awx.main.tasks import system
|
||||
from dispatcherd.publish import task
|
||||
|
||||
'''
|
||||
Prevent logger.<warn, debug, error> calls from triggering database operations
|
||||
@@ -57,294 +49,6 @@ def multiply(a, b):
|
||||
return a * b
|
||||
|
||||
|
||||
class SimpleWorker(BaseWorker):
|
||||
def perform_work(self, body, *args):
|
||||
pass
|
||||
|
||||
|
||||
class ResultWriter(BaseWorker):
|
||||
def perform_work(self, body, result_queue):
|
||||
result_queue.put(body + '!!!')
|
||||
|
||||
|
||||
class SlowResultWriter(BaseWorker):
|
||||
def perform_work(self, body, result_queue):
|
||||
time.sleep(3)
|
||||
super(SlowResultWriter, self).perform_work(body, result_queue)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("disable_database_settings")
|
||||
class TestPoolWorker:
|
||||
def setup_method(self, test_method):
|
||||
self.worker = StatefulPoolWorker(1000, self.tick, tuple())
|
||||
|
||||
def tick(self):
|
||||
self.worker.finished.put(self.worker.queue.get()['uuid'])
|
||||
time.sleep(0.5)
|
||||
|
||||
def test_qsize(self):
|
||||
assert self.worker.qsize == 0
|
||||
for i in range(3):
|
||||
self.worker.put({'task': 'abc123'})
|
||||
assert self.worker.qsize == 3
|
||||
|
||||
def test_put(self):
|
||||
assert len(self.worker.managed_tasks) == 0
|
||||
assert self.worker.messages_finished == 0
|
||||
self.worker.put({'task': 'abc123'})
|
||||
|
||||
assert len(self.worker.managed_tasks) == 1
|
||||
assert self.worker.messages_sent == 1
|
||||
|
||||
def test_managed_tasks(self):
|
||||
self.worker.put({'task': 'abc123'})
|
||||
self.worker.calculate_managed_tasks()
|
||||
assert len(self.worker.managed_tasks) == 1
|
||||
|
||||
self.tick()
|
||||
self.worker.calculate_managed_tasks()
|
||||
assert len(self.worker.managed_tasks) == 0
|
||||
|
||||
def test_current_task(self):
|
||||
self.worker.put({'task': 'abc123'})
|
||||
assert self.worker.current_task['task'] == 'abc123'
|
||||
|
||||
def test_quit(self):
|
||||
self.worker.quit()
|
||||
assert self.worker.queue.get() == 'QUIT'
|
||||
|
||||
def test_idle_busy(self):
|
||||
assert self.worker.idle is True
|
||||
assert self.worker.busy is False
|
||||
self.worker.put({'task': 'abc123'})
|
||||
assert self.worker.busy is True
|
||||
assert self.worker.idle is False
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestWorkerPool:
|
||||
def setup_method(self, test_method):
|
||||
self.pool = WorkerPool(min_workers=3)
|
||||
|
||||
def teardown_method(self, test_method):
|
||||
self.pool.stop(signal.SIGTERM)
|
||||
|
||||
def test_worker(self):
|
||||
self.pool.init_workers(SimpleWorker().work_loop)
|
||||
assert len(self.pool) == 3
|
||||
for worker in self.pool.workers:
|
||||
assert worker.messages_sent == 0
|
||||
assert worker.alive is True
|
||||
|
||||
def test_single_task(self):
|
||||
self.pool.init_workers(SimpleWorker().work_loop)
|
||||
self.pool.write(0, 'xyz')
|
||||
assert self.pool.workers[0].messages_sent == 1 # worker at index 0 handled one task
|
||||
assert self.pool.workers[1].messages_sent == 0
|
||||
assert self.pool.workers[2].messages_sent == 0
|
||||
|
||||
def test_queue_preference(self):
|
||||
self.pool.init_workers(SimpleWorker().work_loop)
|
||||
self.pool.write(2, 'xyz')
|
||||
assert self.pool.workers[0].messages_sent == 0
|
||||
assert self.pool.workers[1].messages_sent == 0
|
||||
assert self.pool.workers[2].messages_sent == 1 # worker at index 2 handled one task
|
||||
|
||||
def test_worker_processing(self):
|
||||
result_queue = multiprocessing.Queue()
|
||||
self.pool.init_workers(ResultWriter().work_loop, result_queue)
|
||||
for i in range(10):
|
||||
self.pool.write(random.choice(range(len(self.pool))), 'Hello, Worker {}'.format(i))
|
||||
all_messages = [result_queue.get(timeout=1) for i in range(10)]
|
||||
all_messages.sort()
|
||||
assert all_messages == ['Hello, Worker {}!!!'.format(i) for i in range(10)]
|
||||
|
||||
total_handled = sum([worker.messages_sent for worker in self.pool.workers])
|
||||
assert total_handled == 10
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestAutoScaling:
|
||||
def setup_method(self, test_method):
|
||||
self.pool = AutoscalePool(min_workers=2, max_workers=10)
|
||||
|
||||
def teardown_method(self, test_method):
|
||||
self.pool.stop(signal.SIGTERM)
|
||||
|
||||
def test_scale_up(self):
|
||||
result_queue = multiprocessing.Queue()
|
||||
self.pool.init_workers(SlowResultWriter().work_loop, result_queue)
|
||||
|
||||
# start with two workers, write an event to each worker and make it busy
|
||||
assert len(self.pool) == 2
|
||||
for i, w in enumerate(self.pool.workers):
|
||||
w.put('Hello, Worker {}'.format(0))
|
||||
assert len(self.pool) == 2
|
||||
|
||||
# wait for the subprocesses to start working on their tasks and be marked busy
|
||||
time.sleep(1)
|
||||
assert self.pool.should_grow
|
||||
|
||||
# write a third message, expect a new worker to spawn because all
|
||||
# workers are busy
|
||||
self.pool.write(0, 'Hello, Worker {}'.format(2))
|
||||
assert len(self.pool) == 3
|
||||
|
||||
def test_scale_down(self):
|
||||
self.pool.init_workers(ResultWriter().work_loop, multiprocessing.Queue())
|
||||
|
||||
# start with two workers, and scale up to 10 workers
|
||||
assert len(self.pool) == 2
|
||||
for i in range(8):
|
||||
self.pool.up()
|
||||
assert len(self.pool) == 10
|
||||
|
||||
# cleanup should scale down to 8 workers
|
||||
self.pool.cleanup()
|
||||
assert len(self.pool) == 2
|
||||
|
||||
def test_max_scale_up(self):
|
||||
self.pool.init_workers(ResultWriter().work_loop, multiprocessing.Queue())
|
||||
|
||||
assert len(self.pool) == 2
|
||||
for i in range(25):
|
||||
self.pool.up()
|
||||
assert self.pool.max_workers == 10
|
||||
assert self.pool.full is True
|
||||
assert len(self.pool) == 10
|
||||
|
||||
def test_equal_worker_distribution(self):
|
||||
# if all workers are busy, spawn new workers *before* adding messages
|
||||
# to an existing queue
|
||||
self.pool.init_workers(SlowResultWriter().work_loop, multiprocessing.Queue)
|
||||
|
||||
# start with two workers, write an event to each worker and make it busy
|
||||
assert len(self.pool) == 2
|
||||
for i in range(10):
|
||||
self.pool.write(0, 'Hello, World!')
|
||||
assert len(self.pool) == 10
|
||||
for w in self.pool.workers:
|
||||
assert w.busy
|
||||
assert len(w.managed_tasks) == 1
|
||||
|
||||
# the queue is full at 10, the _next_ write should put the message into
|
||||
# a worker's backlog
|
||||
assert len(self.pool) == 10
|
||||
for w in self.pool.workers:
|
||||
assert w.messages_sent == 1
|
||||
self.pool.write(0, 'Hello, World!')
|
||||
assert len(self.pool) == 10
|
||||
assert self.pool.workers[0].messages_sent == 2
|
||||
|
||||
@pytest.mark.timeout(20)
|
||||
def test_lost_worker_autoscale(self):
|
||||
# if a worker exits, it should be replaced automatically up to min_workers
|
||||
self.pool.init_workers(ResultWriter().work_loop, multiprocessing.Queue())
|
||||
|
||||
# start with two workers, kill one of them
|
||||
assert len(self.pool) == 2
|
||||
assert not self.pool.should_grow
|
||||
alive_pid = self.pool.workers[1].pid
|
||||
self.pool.workers[0].process.kill()
|
||||
self.pool.workers[0].process.join() # waits for process to full terminate
|
||||
|
||||
# clean up and the dead worker
|
||||
self.pool.cleanup()
|
||||
assert len(self.pool) == 1
|
||||
assert self.pool.workers[0].pid == alive_pid
|
||||
|
||||
# the next queue write should replace the lost worker
|
||||
self.pool.write(0, 'Hello, Worker')
|
||||
assert len(self.pool) == 2
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("disable_database_settings")
|
||||
class TestTaskDispatcher:
|
||||
@property
|
||||
def tm(self):
|
||||
return TaskWorker()
|
||||
|
||||
def test_function_dispatch(self):
|
||||
result = self.tm.perform_work({'task': 'awx.main.tests.functional.test_dispatch.add', 'args': [2, 2]})
|
||||
assert result == 4
|
||||
|
||||
def test_function_dispatch_must_be_decorated(self):
|
||||
result = self.tm.perform_work({'task': 'awx.main.tests.functional.test_dispatch.restricted', 'args': [2, 2]})
|
||||
assert isinstance(result, ValueError)
|
||||
assert str(result) == 'awx.main.tests.functional.test_dispatch.restricted is not decorated with @task()' # noqa
|
||||
|
||||
def test_method_dispatch(self):
|
||||
result = self.tm.perform_work({'task': 'awx.main.tests.functional.test_dispatch.Adder', 'args': [2, 2]})
|
||||
assert result == 4
|
||||
|
||||
def test_method_dispatch_must_be_decorated(self):
|
||||
result = self.tm.perform_work({'task': 'awx.main.tests.functional.test_dispatch.Restricted', 'args': [2, 2]})
|
||||
assert isinstance(result, ValueError)
|
||||
assert str(result) == 'awx.main.tests.functional.test_dispatch.Restricted is not decorated with @task()' # noqa
|
||||
|
||||
def test_python_function_cannot_be_imported(self):
|
||||
result = self.tm.perform_work(
|
||||
{
|
||||
'task': 'os.system',
|
||||
'args': ['ls'],
|
||||
}
|
||||
)
|
||||
assert isinstance(result, ValueError)
|
||||
assert str(result) == 'os.system is not a valid awx task' # noqa
|
||||
|
||||
def test_undefined_function_cannot_be_imported(self):
|
||||
result = self.tm.perform_work({'task': 'awx.foo.bar'})
|
||||
assert isinstance(result, ModuleNotFoundError)
|
||||
assert str(result) == "No module named 'awx.foo'" # noqa
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskPublisher:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _disable_dispatcherd(self):
|
||||
flag_name = "FEATURE_DISPATCHERD_ENABLED"
|
||||
disable_flag(flag_name)
|
||||
yield
|
||||
enable_flag(flag_name)
|
||||
|
||||
def test_function_callable(self):
|
||||
assert add(2, 2) == 4
|
||||
|
||||
def test_method_callable(self):
|
||||
assert Adder().run(2, 2) == 4
|
||||
|
||||
def test_function_apply_async(self):
|
||||
message, queue = add.apply_async([2, 2], queue='foobar')
|
||||
assert message['args'] == [2, 2]
|
||||
assert message['kwargs'] == {}
|
||||
assert message['task'] == 'awx.main.tests.functional.test_dispatch.add'
|
||||
assert queue == 'foobar'
|
||||
|
||||
def test_method_apply_async(self):
|
||||
message, queue = Adder.apply_async([2, 2], queue='foobar')
|
||||
assert message['args'] == [2, 2]
|
||||
assert message['kwargs'] == {}
|
||||
assert message['task'] == 'awx.main.tests.functional.test_dispatch.Adder'
|
||||
assert queue == 'foobar'
|
||||
|
||||
def test_apply_async_queue_required(self):
|
||||
with pytest.raises(ValueError) as e:
|
||||
message, queue = add.apply_async([2, 2])
|
||||
assert "awx.main.tests.functional.test_dispatch.add: Queue value required and may not be None" == e.value.args[0]
|
||||
|
||||
def test_queue_defined_in_task_decorator(self):
|
||||
message, queue = multiply.apply_async([2, 2])
|
||||
assert queue == 'hard-math'
|
||||
|
||||
def test_queue_overridden_from_task_decorator(self):
|
||||
message, queue = multiply.apply_async([2, 2], queue='not-so-hard')
|
||||
assert queue == 'not-so-hard'
|
||||
|
||||
def test_apply_with_callable_queuename(self):
|
||||
message, queue = add.apply_async([2, 2], queue=lambda: 'called')
|
||||
assert queue == 'called'
|
||||
|
||||
|
||||
yesterday = tz_now() - datetime.timedelta(days=1)
|
||||
minute = tz_now() - datetime.timedelta(seconds=120)
|
||||
now = tz_now()
|
||||
@@ -358,11 +62,6 @@ class TestJobReaper(object):
|
||||
('running', '', '', None, False), # running, not assigned to the instance
|
||||
('running', 'awx', '', None, True), # running, has the instance as its execution_node
|
||||
('running', '', 'awx', None, True), # running, has the instance as its controller_node
|
||||
('waiting', '', '', None, False), # waiting, not assigned to the instance
|
||||
('waiting', 'awx', '', None, False), # waiting, was edited less than a minute ago
|
||||
('waiting', '', 'awx', None, False), # waiting, was edited less than a minute ago
|
||||
('waiting', 'awx', '', yesterday, False), # waiting, managed by another node, ignore
|
||||
('waiting', '', 'awx', yesterday, True), # waiting, assigned to the controller_node, stale
|
||||
],
|
||||
)
|
||||
def test_should_reap(self, status, fail, execution_node, controller_node, modified):
|
||||
@@ -380,7 +79,6 @@ class TestJobReaper(object):
|
||||
# (because .save() overwrites it to _now_)
|
||||
Job.objects.filter(id=j.id).update(modified=modified)
|
||||
reaper.reap(i)
|
||||
reaper.reap_waiting(i)
|
||||
job = Job.objects.first()
|
||||
if fail:
|
||||
assert job.status == 'failed'
|
||||
@@ -389,6 +87,20 @@ class TestJobReaper(object):
|
||||
else:
|
||||
assert job.status == status
|
||||
|
||||
def test_waiting_job_sent_back_to_pending(self):
|
||||
this_inst = Instance(hostname='awx')
|
||||
this_inst.save()
|
||||
lost_inst = Instance(hostname='lost', node_type=Instance.Types.EXECUTION, node_state=Instance.States.UNAVAILABLE)
|
||||
lost_inst.save()
|
||||
job = Job.objects.create(status='waiting', controller_node=lost_inst.hostname, execution_node='lost')
|
||||
|
||||
system._heartbeat_handle_lost_instances([lost_inst], this_inst)
|
||||
job.refresh_from_db()
|
||||
|
||||
assert job.status == 'pending'
|
||||
assert job.controller_node == ''
|
||||
assert job.execution_node == ''
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'excluded_uuids, fail, started',
|
||||
[
|
||||
@@ -448,76 +160,3 @@ class TestJobReaper(object):
|
||||
assert job.started > ref_time
|
||||
assert job.status == 'running'
|
||||
assert job.job_explanation == ''
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestScheduler:
|
||||
def test_too_many_schedules_freak_out(self):
|
||||
with pytest.raises(RuntimeError):
|
||||
Scheduler({'job1': {'schedule': datetime.timedelta(seconds=1)}, 'job2': {'schedule': datetime.timedelta(seconds=1)}})
|
||||
|
||||
def test_spread_out(self):
|
||||
scheduler = Scheduler(
|
||||
{
|
||||
'job1': {'schedule': datetime.timedelta(seconds=16)},
|
||||
'job2': {'schedule': datetime.timedelta(seconds=16)},
|
||||
'job3': {'schedule': datetime.timedelta(seconds=16)},
|
||||
'job4': {'schedule': datetime.timedelta(seconds=16)},
|
||||
}
|
||||
)
|
||||
assert [job.offset for job in scheduler.jobs] == [0, 4, 8, 12]
|
||||
|
||||
def test_missed_schedule(self, mocker):
|
||||
scheduler = Scheduler({'job1': {'schedule': datetime.timedelta(seconds=10)}})
|
||||
assert scheduler.jobs[0].missed_runs(time.time() - scheduler.global_start) == 0
|
||||
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 50)
|
||||
scheduler.get_and_mark_pending()
|
||||
assert scheduler.jobs[0].missed_runs(50) > 1
|
||||
|
||||
def test_advance_schedule(self, mocker):
|
||||
scheduler = Scheduler(
|
||||
{
|
||||
'job1': {'schedule': datetime.timedelta(seconds=30)},
|
||||
'joba': {'schedule': datetime.timedelta(seconds=20)},
|
||||
'jobb': {'schedule': datetime.timedelta(seconds=20)},
|
||||
}
|
||||
)
|
||||
for job in scheduler.jobs:
|
||||
# HACK: the offsets automatically added make this a hard test to write... so remove offsets
|
||||
job.offset = 0.0
|
||||
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 29)
|
||||
to_run = scheduler.get_and_mark_pending()
|
||||
assert set(job.name for job in to_run) == set(['joba', 'jobb'])
|
||||
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 39)
|
||||
to_run = scheduler.get_and_mark_pending()
|
||||
assert len(to_run) == 1
|
||||
assert to_run[0].name == 'job1'
|
||||
|
||||
@staticmethod
|
||||
def get_job(scheduler, name):
|
||||
for job in scheduler.jobs:
|
||||
if job.name == name:
|
||||
return job
|
||||
|
||||
def test_scheduler_debug(self, mocker):
|
||||
scheduler = Scheduler(
|
||||
{
|
||||
'joba': {'schedule': datetime.timedelta(seconds=20)},
|
||||
'jobb': {'schedule': datetime.timedelta(seconds=50)},
|
||||
'jobc': {'schedule': datetime.timedelta(seconds=500)},
|
||||
'jobd': {'schedule': datetime.timedelta(seconds=20)},
|
||||
}
|
||||
)
|
||||
rel_time = 119.9 # slightly under the 6th 20-second bin, to avoid offset problems
|
||||
current_time = scheduler.global_start + rel_time
|
||||
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=current_time - 1.0e-8)
|
||||
self.get_job(scheduler, 'jobb').mark_run(rel_time)
|
||||
self.get_job(scheduler, 'jobd').mark_run(rel_time - 20.0)
|
||||
|
||||
output = scheduler.debug()
|
||||
data = yaml.safe_load(output)
|
||||
assert data['schedule_list']['jobc']['last_run_seconds_ago'] is None
|
||||
assert data['schedule_list']['joba']['missed_runs'] == 4
|
||||
assert data['schedule_list']['jobd']['missed_runs'] == 3
|
||||
assert data['schedule_list']['jobd']['completed_runs'] == 1
|
||||
assert data['schedule_list']['jobb']['next_run_in_seconds'] > 25.0
|
||||
|
||||
@@ -8,6 +8,7 @@ from awx.main.models import (
|
||||
Instance,
|
||||
Host,
|
||||
JobHostSummary,
|
||||
Inventory,
|
||||
InventoryUpdate,
|
||||
InventorySource,
|
||||
Project,
|
||||
@@ -17,14 +18,60 @@ from awx.main.models import (
|
||||
InstanceGroup,
|
||||
Label,
|
||||
ExecutionEnvironment,
|
||||
Credential,
|
||||
CredentialType,
|
||||
CredentialInputSource,
|
||||
Organization,
|
||||
JobTemplate,
|
||||
)
|
||||
from awx.main.tasks import jobs
|
||||
from awx.main.tasks.system import cluster_node_heartbeat
|
||||
from awx.main.utils.db import bulk_update_sorted_by_id
|
||||
from ansible_base.lib.testing.util import feature_flag_enabled, feature_flag_disabled
|
||||
|
||||
from django.db import OperationalError
|
||||
from django.test.utils import override_settings
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def job_template_with_credentials():
|
||||
"""
|
||||
Factory fixture that creates a job template with specified credentials.
|
||||
|
||||
Usage:
|
||||
job = job_template_with_credentials(ssh_cred, vault_cred)
|
||||
"""
|
||||
|
||||
def _create_job_template(
|
||||
*credentials, org_name='test-org', project_name='test-project', inventory_name='test-inventory', jt_name='test-jt', playbook='test.yml'
|
||||
):
|
||||
"""
|
||||
Create a job template with the given credentials.
|
||||
|
||||
Args:
|
||||
*credentials: Variable number of Credential objects to attach to the job template
|
||||
org_name: Name for the organization
|
||||
project_name: Name for the project
|
||||
inventory_name: Name for the inventory
|
||||
jt_name: Name for the job template
|
||||
playbook: Playbook filename
|
||||
|
||||
Returns:
|
||||
Job instance created from the job template
|
||||
"""
|
||||
org = Organization.objects.create(name=org_name)
|
||||
proj = Project.objects.create(name=project_name, organization=org)
|
||||
inv = Inventory.objects.create(name=inventory_name, organization=org)
|
||||
jt = JobTemplate.objects.create(name=jt_name, project=proj, inventory=inv, playbook=playbook)
|
||||
|
||||
if credentials:
|
||||
jt.credentials.add(*credentials)
|
||||
|
||||
return jt.create_unified_job()
|
||||
|
||||
return _create_job_template
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_orphan_unified_job_creation(instance, inventory):
|
||||
job = Job.objects.create(job_template=None, inventory=inventory, name='hi world')
|
||||
@@ -50,7 +97,7 @@ def test_job_capacity_and_with_inactive_node():
|
||||
i.save()
|
||||
with override_settings(CLUSTER_HOST_ID=i.hostname):
|
||||
with mock.patch.object(redis.client.Redis, 'ping', lambda self: True):
|
||||
cluster_node_heartbeat()
|
||||
cluster_node_heartbeat(None)
|
||||
i = Instance.objects.get(id=i.id)
|
||||
assert i.capacity == 0
|
||||
|
||||
@@ -262,3 +309,442 @@ class TestLaunchConfig:
|
||||
assert config.execution_environment
|
||||
# We just write the PK instead of trying to assign an item, that happens on the save
|
||||
assert config.execution_environment_id == ee.id
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_base_task_credentials_property(job_template_with_credentials):
|
||||
"""Test that _credentials property caches credentials and doesn't re-query."""
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create real credentials
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
vault_type = CredentialType.defaults['vault']()
|
||||
vault_type.save()
|
||||
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
vault_cred = Credential.objects.create(credential_type=vault_type, name='vault-cred')
|
||||
|
||||
# Create a job with credentials using fixture
|
||||
job = job_template_with_credentials(ssh_cred, vault_cred)
|
||||
task.instance = job
|
||||
|
||||
# First access should build credentials
|
||||
result1 = task._credentials
|
||||
assert len(result1) == 2
|
||||
assert isinstance(result1, list)
|
||||
|
||||
# Second access should return cached value (we can verify by checking it's the same list object)
|
||||
result2 = task._credentials
|
||||
assert result2 is result1 # Same object reference
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_job_machine_credential(job_template_with_credentials):
|
||||
"""Test _machine_credential returns ssh credential from cache."""
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create credentials
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
vault_type = CredentialType.defaults['vault']()
|
||||
vault_type.save()
|
||||
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
vault_cred = Credential.objects.create(credential_type=vault_type, name='vault-cred')
|
||||
|
||||
# Create a job using fixture
|
||||
job = job_template_with_credentials(ssh_cred, vault_cred)
|
||||
task.instance = job
|
||||
|
||||
# Set cached credentials
|
||||
task._credentials = [ssh_cred, vault_cred]
|
||||
|
||||
# Get machine credential
|
||||
result = task._machine_credential
|
||||
assert result == ssh_cred
|
||||
assert result.credential_type.kind == 'ssh'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_job_machine_credential_none(job_template_with_credentials):
|
||||
"""Test _machine_credential returns None when no ssh credential exists."""
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create only vault credential
|
||||
vault_type = CredentialType.defaults['vault']()
|
||||
vault_type.save()
|
||||
vault_cred = Credential.objects.create(credential_type=vault_type, name='vault-cred')
|
||||
|
||||
job = job_template_with_credentials(vault_cred)
|
||||
task.instance = job
|
||||
|
||||
# Set cached credentials
|
||||
task._credentials = [vault_cred]
|
||||
|
||||
# Get machine credential
|
||||
result = task._machine_credential
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_job_vault_credentials(job_template_with_credentials):
|
||||
"""Test _vault_credentials returns all vault credentials from cache."""
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create credentials
|
||||
vault_type = CredentialType.defaults['vault']()
|
||||
vault_type.save()
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
|
||||
vault_cred1 = Credential.objects.create(credential_type=vault_type, name='vault-1')
|
||||
vault_cred2 = Credential.objects.create(credential_type=vault_type, name='vault-2')
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
|
||||
job = job_template_with_credentials(vault_cred1, ssh_cred, vault_cred2)
|
||||
task.instance = job
|
||||
|
||||
# Set cached credentials
|
||||
task._credentials = [vault_cred1, ssh_cred, vault_cred2]
|
||||
|
||||
# Get vault credentials
|
||||
result = task._vault_credentials
|
||||
assert len(result) == 2
|
||||
assert vault_cred1 in result
|
||||
assert vault_cred2 in result
|
||||
assert ssh_cred not in result
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_job_network_credentials(job_template_with_credentials):
|
||||
"""Test _network_credentials returns all network credentials from cache."""
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create credentials
|
||||
net_type = CredentialType.defaults['net']()
|
||||
net_type.save()
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
|
||||
net_cred = Credential.objects.create(credential_type=net_type, name='net-cred')
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
|
||||
job = job_template_with_credentials(net_cred, ssh_cred)
|
||||
task.instance = job
|
||||
|
||||
# Set cached credentials
|
||||
task._credentials = [net_cred, ssh_cred]
|
||||
|
||||
# Get network credentials
|
||||
result = task._network_credentials
|
||||
assert len(result) == 1
|
||||
assert result[0] == net_cred
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_job_cloud_credentials(job_template_with_credentials):
|
||||
"""Test _cloud_credentials returns all cloud credentials from cache."""
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create credentials
|
||||
aws_type = CredentialType.defaults['aws']()
|
||||
aws_type.save()
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
|
||||
aws_cred = Credential.objects.create(credential_type=aws_type, name='aws-cred')
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
|
||||
job = job_template_with_credentials(aws_cred, ssh_cred)
|
||||
task.instance = job
|
||||
|
||||
# Set cached credentials
|
||||
task._credentials = [aws_cred, ssh_cred]
|
||||
|
||||
# Get cloud credentials
|
||||
result = task._cloud_credentials
|
||||
assert len(result) == 1
|
||||
assert result[0] == aws_cred
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@override_settings(RESOURCE_SERVER={'URL': 'https://gateway.example.com', 'SECRET_KEY': 'test-secret-key', 'VALIDATE_HTTPS': False})
|
||||
def test_populate_workload_identity_tokens_with_flag_enabled(job_template_with_credentials, mocker):
|
||||
"""Test populate_workload_identity_tokens sets context when flag is enabled."""
|
||||
with feature_flag_enabled('FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED'):
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create credential types
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
|
||||
# Create a workload identity credential type
|
||||
hashivault_type = CredentialType(
|
||||
name='HashiCorp Vault Secret Lookup (OIDC)',
|
||||
kind='cloud',
|
||||
managed=False,
|
||||
inputs={
|
||||
'fields': [
|
||||
{'id': 'jwt_aud', 'type': 'string', 'label': 'JWT Audience'},
|
||||
{'id': 'workload_identity_token', 'type': 'string', 'label': 'Workload Identity Token', 'secret': True, 'internal': True},
|
||||
]
|
||||
},
|
||||
)
|
||||
hashivault_type.save()
|
||||
|
||||
# Create credentials
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
source_cred = Credential.objects.create(credential_type=hashivault_type, name='vault-source', inputs={'jwt_aud': 'https://vault.example.com'})
|
||||
target_cred = Credential.objects.create(credential_type=ssh_type, name='target-cred', inputs={'username': 'testuser'})
|
||||
|
||||
# Create input source linking source credential to target credential
|
||||
input_source = CredentialInputSource.objects.create(
|
||||
target_credential=target_cred, source_credential=source_cred, input_field_name='password', metadata={'path': 'secret/data/password'}
|
||||
)
|
||||
|
||||
# Create a job using fixture
|
||||
job = job_template_with_credentials(target_cred, ssh_cred)
|
||||
task.instance = job
|
||||
|
||||
# Override cached_property so the loop uses these exact Python objects
|
||||
task._credentials = [target_cred, ssh_cred]
|
||||
|
||||
# Mock only the HTTP response from the Gateway workload identity endpoint
|
||||
mock_response = mocker.Mock(status_code=200)
|
||||
mock_response.json.return_value = {'jwt': 'eyJ.test.jwt'}
|
||||
|
||||
mock_request = mocker.patch('requests.request', return_value=mock_response, autospec=True)
|
||||
|
||||
task.populate_workload_identity_tokens()
|
||||
|
||||
# Verify the HTTP call was made to the correct endpoint
|
||||
mock_request.assert_called_once()
|
||||
call_kwargs = mock_request.call_args.kwargs
|
||||
assert call_kwargs['method'] == 'POST'
|
||||
assert '/api/gateway/v1/workload_identity_tokens' in call_kwargs['url']
|
||||
|
||||
# Verify context was set on the credential, keyed by input source PK
|
||||
assert input_source.pk in target_cred.context
|
||||
assert target_cred.context[input_source.pk]['workload_identity_token'] == 'eyJ.test.jwt'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@override_settings(RESOURCE_SERVER={'URL': 'https://gateway.example.com', 'SECRET_KEY': 'test-secret-key', 'VALIDATE_HTTPS': False})
|
||||
def test_populate_workload_identity_tokens_passes_workload_ttl_from_job_timeout(job_template_with_credentials, mocker):
|
||||
"""Test populate_workload_identity_tokens passes workload_ttl_seconds from get_instance_timeout to the client."""
|
||||
with feature_flag_enabled('FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED'):
|
||||
task = jobs.RunJob()
|
||||
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
|
||||
hashivault_type = CredentialType(
|
||||
name='HashiCorp Vault Secret Lookup (OIDC)',
|
||||
kind='cloud',
|
||||
managed=False,
|
||||
inputs={
|
||||
'fields': [
|
||||
{'id': 'jwt_aud', 'type': 'string', 'label': 'JWT Audience'},
|
||||
{'id': 'workload_identity_token', 'type': 'string', 'label': 'Workload Identity Token', 'secret': True, 'internal': True},
|
||||
]
|
||||
},
|
||||
)
|
||||
hashivault_type.save()
|
||||
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
source_cred = Credential.objects.create(credential_type=hashivault_type, name='vault-source', inputs={'jwt_aud': 'https://vault.example.com'})
|
||||
target_cred = Credential.objects.create(credential_type=ssh_type, name='target-cred', inputs={'username': 'testuser'})
|
||||
|
||||
CredentialInputSource.objects.create(
|
||||
target_credential=target_cred, source_credential=source_cred, input_field_name='password', metadata={'path': 'secret/data/password'}
|
||||
)
|
||||
|
||||
job = job_template_with_credentials(target_cred, ssh_cred)
|
||||
job.timeout = 3600
|
||||
job.save()
|
||||
task.instance = job
|
||||
task._credentials = [target_cred, ssh_cred]
|
||||
|
||||
mock_response = mocker.Mock(status_code=200)
|
||||
mock_response.json.return_value = {'jwt': 'eyJ.test.jwt'}
|
||||
mock_request = mocker.patch('requests.request', return_value=mock_response, autospec=True)
|
||||
|
||||
task.populate_workload_identity_tokens()
|
||||
|
||||
call_kwargs = mock_request.call_args.kwargs
|
||||
assert call_kwargs['method'] == 'POST'
|
||||
json_body = call_kwargs.get('json', {})
|
||||
assert json_body.get('workload_ttl_seconds') == 3600
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_populate_workload_identity_tokens_with_flag_disabled(job_template_with_credentials):
|
||||
"""Test populate_workload_identity_tokens sets error status when flag is disabled."""
|
||||
with feature_flag_disabled('FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED'):
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create credential types
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
|
||||
# Create a workload identity credential type
|
||||
hashivault_type = CredentialType(
|
||||
name='HashiCorp Vault Secret Lookup (OIDC)',
|
||||
kind='cloud',
|
||||
managed=False,
|
||||
inputs={
|
||||
'fields': [
|
||||
{'id': 'jwt_aud', 'type': 'string', 'label': 'JWT Audience'},
|
||||
{'id': 'workload_identity_token', 'type': 'string', 'label': 'Workload Identity Token', 'secret': True, 'internal': True},
|
||||
]
|
||||
},
|
||||
)
|
||||
hashivault_type.save()
|
||||
|
||||
# Create credentials
|
||||
source_cred = Credential.objects.create(credential_type=hashivault_type, name='vault-source')
|
||||
target_cred = Credential.objects.create(credential_type=ssh_type, name='target-cred', inputs={'username': 'testuser'})
|
||||
|
||||
# Create input source linking source credential to target credential
|
||||
# Note: Creates the relationship that will trigger the feature flag check
|
||||
CredentialInputSource.objects.create(
|
||||
target_credential=target_cred, source_credential=source_cred, input_field_name='password', metadata={'path': 'secret/data/password'}
|
||||
)
|
||||
|
||||
# Create a job using fixture
|
||||
job = job_template_with_credentials(target_cred)
|
||||
task.instance = job
|
||||
|
||||
# Set cached credentials
|
||||
task._credentials = [target_cred]
|
||||
|
||||
task.populate_workload_identity_tokens()
|
||||
|
||||
# Verify job status was set to error
|
||||
job.refresh_from_db()
|
||||
assert job.status == 'error'
|
||||
assert 'FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED' in job.job_explanation
|
||||
assert 'vault-source' in job.job_explanation
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@override_settings(RESOURCE_SERVER={'URL': 'https://gateway.example.com', 'SECRET_KEY': 'test-secret-key', 'VALIDATE_HTTPS': False})
|
||||
def test_populate_workload_identity_tokens_multiple_input_sources_per_credential(job_template_with_credentials, mocker):
|
||||
"""Test that a single credential with two input sources from different workload identity
|
||||
credential types gets a separate JWT token for each input source, keyed by input source PK."""
|
||||
with feature_flag_enabled('FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED'):
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create credential types
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
|
||||
# Create two different workload identity credential types
|
||||
hashivault_kv_type = CredentialType(
|
||||
name='HashiCorp Vault Secret Lookup (OIDC)',
|
||||
kind='cloud',
|
||||
managed=False,
|
||||
inputs={
|
||||
'fields': [
|
||||
{'id': 'jwt_aud', 'type': 'string', 'label': 'JWT Audience'},
|
||||
{'id': 'workload_identity_token', 'type': 'string', 'label': 'Workload Identity Token', 'secret': True, 'internal': True},
|
||||
]
|
||||
},
|
||||
)
|
||||
hashivault_kv_type.save()
|
||||
|
||||
hashivault_ssh_type = CredentialType(
|
||||
name='HashiCorp Vault Signed SSH (OIDC)',
|
||||
kind='cloud',
|
||||
managed=False,
|
||||
inputs={
|
||||
'fields': [
|
||||
{'id': 'jwt_aud', 'type': 'string', 'label': 'JWT Audience'},
|
||||
{'id': 'workload_identity_token', 'type': 'string', 'label': 'Workload Identity Token', 'secret': True, 'internal': True},
|
||||
]
|
||||
},
|
||||
)
|
||||
hashivault_ssh_type.save()
|
||||
|
||||
# Create source credentials with different audiences
|
||||
source_cred_kv = Credential.objects.create(
|
||||
credential_type=hashivault_kv_type, name='vault-kv-source', inputs={'jwt_aud': 'https://vault-kv.example.com'}
|
||||
)
|
||||
source_cred_ssh = Credential.objects.create(
|
||||
credential_type=hashivault_ssh_type, name='vault-ssh-source', inputs={'jwt_aud': 'https://vault-ssh.example.com'}
|
||||
)
|
||||
|
||||
# Create target credential that uses both sources for different fields
|
||||
target_cred = Credential.objects.create(credential_type=ssh_type, name='target-cred', inputs={'username': 'testuser'})
|
||||
|
||||
# Create two input sources on the same target credential, each for a different field
|
||||
input_source_password = CredentialInputSource.objects.create(
|
||||
target_credential=target_cred, source_credential=source_cred_kv, input_field_name='password', metadata={'path': 'secret/data/password'}
|
||||
)
|
||||
input_source_ssh_key = CredentialInputSource.objects.create(
|
||||
target_credential=target_cred, source_credential=source_cred_ssh, input_field_name='ssh_key_data', metadata={'path': 'secret/data/ssh_key'}
|
||||
)
|
||||
|
||||
# Create a job using fixture
|
||||
job = job_template_with_credentials(target_cred)
|
||||
task.instance = job
|
||||
|
||||
# Override cached_property so the loop uses this exact Python object
|
||||
task._credentials = [target_cred]
|
||||
|
||||
# Mock HTTP responses - return different JWTs for each call
|
||||
response_kv = mocker.Mock(status_code=200)
|
||||
response_kv.json.return_value = {'jwt': 'eyJ.kv.jwt'}
|
||||
|
||||
response_ssh = mocker.Mock(status_code=200)
|
||||
response_ssh.json.return_value = {'jwt': 'eyJ.ssh.jwt'}
|
||||
|
||||
mock_request = mocker.patch('requests.request', side_effect=[response_kv, response_ssh], autospec=True)
|
||||
|
||||
task.populate_workload_identity_tokens()
|
||||
|
||||
# Verify two separate HTTP calls were made (one per input source)
|
||||
assert mock_request.call_count == 2
|
||||
|
||||
# Verify each call used the correct audience from its source credential
|
||||
audiences_requested = {call.kwargs.get('json', {}).get('audience', '') for call in mock_request.call_args_list}
|
||||
assert 'https://vault-kv.example.com' in audiences_requested
|
||||
assert 'https://vault-ssh.example.com' in audiences_requested
|
||||
|
||||
# Verify context on the target credential has both tokens, keyed by input source PK
|
||||
assert input_source_password.pk in target_cred.context
|
||||
assert input_source_ssh_key.pk in target_cred.context
|
||||
assert target_cred.context[input_source_password.pk]['workload_identity_token'] == 'eyJ.kv.jwt'
|
||||
assert target_cred.context[input_source_ssh_key.pk]['workload_identity_token'] == 'eyJ.ssh.jwt'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_populate_workload_identity_tokens_without_workload_identity_credentials(job_template_with_credentials, mocker):
|
||||
"""Test populate_workload_identity_tokens does nothing when no workload identity credentials."""
|
||||
with feature_flag_enabled('FEATURE_OIDC_WORKLOAD_IDENTITY_ENABLED'):
|
||||
task = jobs.RunJob()
|
||||
|
||||
# Create only standard credentials (no workload identity)
|
||||
ssh_type = CredentialType.defaults['ssh']()
|
||||
ssh_type.save()
|
||||
vault_type = CredentialType.defaults['vault']()
|
||||
vault_type.save()
|
||||
|
||||
ssh_cred = Credential.objects.create(credential_type=ssh_type, name='ssh-cred')
|
||||
vault_cred = Credential.objects.create(credential_type=vault_type, name='vault-cred')
|
||||
|
||||
# Create a job using fixture
|
||||
job = job_template_with_credentials(ssh_cred, vault_cred)
|
||||
task.instance = job
|
||||
|
||||
# Set cached credentials
|
||||
task._credentials = [ssh_cred, vault_cred]
|
||||
|
||||
mocker.patch('awx.main.tasks.jobs.populate_claims_for_workload', return_value={'job_id': 123}, autospec=True)
|
||||
|
||||
task.populate_workload_identity_tokens()
|
||||
|
||||
# Verify no context was set
|
||||
assert not hasattr(ssh_cred, '_context') or ssh_cred.context == {}
|
||||
assert not hasattr(vault_cred, '_context') or vault_cred.context == {}
|
||||
|
||||
@@ -173,3 +173,54 @@ class TestMigrationSmoke:
|
||||
assert Role.objects.filter(
|
||||
singleton_name='system_administrator', role_field='system_administrator'
|
||||
).exists(), "expected to find a system_administrator singleton role"
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestGithubAppBug:
|
||||
"""
|
||||
Tests that `awx-manage createsuperuser` runs successfully after
|
||||
the `github_app` CredentialType kind is updated to `github_app_lookup`
|
||||
via the migration.
|
||||
"""
|
||||
|
||||
def test_after_github_app_kind_migration(self, migrator):
|
||||
"""
|
||||
Verifies that `createsuperuser` does not raise a KeyError
|
||||
after the 0204_squashed_deletions migration (which includes
|
||||
the `update_github_app_kind` logic) is applied.
|
||||
"""
|
||||
# 1. Apply migrations up to the point *before* the 0204_squashed_deletions migration.
|
||||
# This simulates the state where the problematic CredentialType might exist.
|
||||
# We use 0203_remove_team_of_teams as the direct predecessor.
|
||||
old_state = migrator.apply_tested_migration(('main', '0203_remove_team_of_teams'))
|
||||
|
||||
# Get the CredentialType model from the historical state.
|
||||
CredentialType = old_state.apps.get_model('main', 'CredentialType')
|
||||
|
||||
# Create a CredentialType with the old, problematic 'namespace' value
|
||||
CredentialType.objects.create(
|
||||
name='Legacy GitHub App Credential',
|
||||
kind='external',
|
||||
namespace='github_app', # The namespace that causes the KeyError in the registry lookup
|
||||
managed=True,
|
||||
created=now(),
|
||||
modified=now(),
|
||||
)
|
||||
|
||||
# Apply the migration that includes the fix (0204_squashed_deletions).
|
||||
new_state = migrator.apply_tested_migration(('main', '0204_squashed_deletions'))
|
||||
|
||||
# Verify that the CredentialType with the old 'kind' no longer exists
|
||||
# and the 'kind' has been updated to the new value.
|
||||
CredentialType = new_state.apps.get_model('main', 'CredentialType') # Get CredentialType model from the new state
|
||||
|
||||
# Assertion 1: The CredentialType with the old 'github_app' kind should no longer exist.
|
||||
assert not CredentialType.objects.filter(
|
||||
namespace='github_app'
|
||||
).exists(), "CredentialType with old 'github_app' kind should no longer exist after migration."
|
||||
|
||||
# Assertion 2: The CredentialType should now exist with the new 'github_app_lookup' kind
|
||||
# and retain its original name.
|
||||
assert CredentialType.objects.filter(
|
||||
namespace='github_app_lookup', name='Legacy GitHub App Credential'
|
||||
).exists(), "CredentialType should be updated to 'github_app_lookup' and retain its name."
|
||||
|
||||
@@ -18,13 +18,14 @@ from awx.main.tests.functional.conftest import * # noqa
|
||||
from awx.main.tests.conftest import load_all_credentials # noqa: F401; pylint: disable=unused-import
|
||||
from awx.main.tests import data
|
||||
|
||||
from awx.main.models import Project, JobTemplate, Organization, Inventory
|
||||
from awx.main.models import Project, JobTemplate, Organization, Inventory, WorkflowJob, UnifiedJob
|
||||
from awx.main.tasks.system import clear_setting_cache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
PROJ_DATA = os.path.join(os.path.dirname(data.__file__), 'projects')
|
||||
COLL_DATA = os.path.join(os.path.dirname(data.__file__), 'collections')
|
||||
|
||||
|
||||
def _copy_folders(source_path, dest_path, clear=False):
|
||||
@@ -56,6 +57,7 @@ def live_tmp_folder():
|
||||
shutil.rmtree(path)
|
||||
os.mkdir(path)
|
||||
_copy_folders(PROJ_DATA, path)
|
||||
_copy_folders(COLL_DATA, path)
|
||||
for dirname in os.listdir(path):
|
||||
source_dir = os.path.join(path, dirname)
|
||||
subprocess.run(GIT_COMMANDS, cwd=source_dir, shell=True)
|
||||
@@ -69,7 +71,7 @@ def live_tmp_folder():
|
||||
settings._awx_conf_memoizedcache.clear()
|
||||
# cache is cleared in test environment, but need to clear in test environment
|
||||
clear_setting_cache.delay(['AWX_ISOLATION_SHOW_PATHS'])
|
||||
time.sleep(0.2) # allow task to finish, we have no real metric to know
|
||||
time.sleep(5.0) # for _awx_conf_memoizedcache to expire on all workers
|
||||
else:
|
||||
logger.info(f'Believed that {path} is already in settings.AWX_ISOLATION_SHOW_PATHS: {settings.AWX_ISOLATION_SHOW_PATHS}')
|
||||
return path
|
||||
@@ -100,6 +102,21 @@ def wait_for_events(uj, timeout=2):
|
||||
|
||||
|
||||
def unified_job_stdout(uj):
|
||||
if type(uj) is UnifiedJob:
|
||||
uj = uj.get_real_instance()
|
||||
if isinstance(uj, WorkflowJob):
|
||||
outputs = []
|
||||
for node in uj.workflow_job_nodes.all().select_related('job').order_by('id'):
|
||||
if node.job is None:
|
||||
continue
|
||||
outputs.append(
|
||||
'workflow node {node_id} job {job_id} output:\n{output}'.format(
|
||||
node_id=node.id,
|
||||
job_id=node.job.id,
|
||||
output=unified_job_stdout(node.job),
|
||||
)
|
||||
)
|
||||
return '\n'.join(outputs)
|
||||
wait_for_events(uj)
|
||||
return '\n'.join([event.stdout for event in uj.get_event_queryset().order_by('created')])
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user