Fix recover-control-plane to work with etcd 3.3.x and add CI (#5500)

* Fix recover-control-plane to work with etcd 3.3.x and add CI

* Set default values for testcase

* Add actual test jobs

* Attempt to satisty gitlab ci linter

* Fix ansible targets

* Set etcd_member_name as stated in the docs...

* Recovering from 0 masters is not supported yet

* Add other master to broken_kube-master group as well

* Increase number of retries to see if etcd needs more time to heal

* Make number of retries for ETCD loops configurable, increase it for recovery CI and document it
This commit is contained in:
qvicksilver
2020-02-11 10:38:01 +01:00
committed by GitHub
parent 68c8c05775
commit ac2135e450
23 changed files with 204 additions and 134 deletions

View File

@@ -5,7 +5,7 @@
- name: Set VM count needed for CI test_id
set_fact:
vm_count: "{%- if mode in ['separate', 'separate-scale', 'ha', 'ha-scale'] -%}{{ 3|int }}{%- elif mode == 'aio' -%}{{ 1|int }}{%- else -%}{{ 2|int }}{%- endif -%}"
vm_count: "{%- if mode in ['separate', 'separate-scale', 'ha', 'ha-scale', 'ha-recover', 'ha-recover-noquorum'] -%}{{ 3|int }}{%- elif mode == 'aio' -%}{{ 1|int }}{%- else -%}{{ 2|int }}{%- endif -%}"
- import_tasks: create-vms.yml
when:

View File

@@ -45,6 +45,45 @@ instance-1
[vault]
instance-1
{% elif mode == "ha-recover" %}
[kube-master]
instance-1
instance-2
[kube-node]
instance-3
[etcd]
instance-3
instance-1
instance-2
[broken_kube-master]
instance-2
[broken_etcd]
instance-2 etcd_member_name=etcd3
{% elif mode == "ha-recover-noquorum" %}
[kube-master]
instance-3
instance-1
instance-2
[kube-node]
instance-3
[etcd]
instance-3
instance-1
instance-2
[broken_kube-master]
instance-1
instance-2
[broken_etcd]
instance-1 etcd_member_name=etcd2
instance-2 etcd_member_name=etcd3
{% endif %}
[k8s-cluster:children]

View File

@@ -0,0 +1,10 @@
---
# Instance settings
cloud_image: ubuntu-1804
mode: ha-recover-noquorum
vm_memory: 1600Mi
# Kubespray settings
kube_network_plugin: calico
deploy_netchecker: true
dns_min_replicas: 1

View File

@@ -0,0 +1,10 @@
---
# Instance settings
cloud_image: ubuntu-1804
mode: ha-recover
vm_memory: 1600Mi
# Kubespray settings
kube_network_plugin: calico
deploy_netchecker: true
dns_min_replicas: 1

View File

@@ -47,6 +47,12 @@ if [ "${UPGRADE_TEST}" != "false" ]; then
ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" $PLAYBOOK
fi
# Test control plane recovery
if [ "${RECOVER_CONTROL_PLANE_TEST}" != "false" ]; then
ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} --limit "${RECOVER_CONTROL_PLANE_TEST_GROUPS}:!fake_hosts" -e reset_confirmation=yes reset.yml
ansible-playbook ${LOG_LEVEL} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e ansible_python_interpreter=${PYPATH} -e etcd_retries=10 --limit etcd,kube-master:!fake_hosts recover-control-plane.yml
fi
# Tests Cases
## Test Master API
ansible-playbook -e ansible_python_interpreter=${PYPATH} --limit "all:!fake_hosts" tests/testcases/010_check-apiserver.yml $LOG_LEVEL

View File

@@ -25,3 +25,9 @@ kube-master
calico-rr
[calico-rr]
[broken_kube-master]
node2
[broken_etcd]
node2

View File

@@ -29,6 +29,12 @@
[vault]
{{droplets.results[1].droplet.name}}
{{droplets.results[2].droplet.name}}
[broken_kube-master]
{{droplets.results[1].droplet.name}}
[broken_etcd]
{{droplets.results[2].droplet.name}}
{% else %}
[kube-master]
{{droplets.results[0].droplet.name}}

View File

@@ -37,6 +37,13 @@
{{node1}}
{{node2}}
{{node3}}
[broken_kube-master]
{{node2}}
[etcd]
{{node2}}
{{node3}}
{% elif mode == "default" %}
[kube-master]
{{node1}}