From c938dfa634658749e2a1172295d053222c77bf9a Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Tue, 7 Jan 2025 14:25:48 +0100 Subject: [PATCH 01/30] scripts: get_nodes_ids.sh Add the script used to obtain graphql node IDs from Github so it's easier to add a new component. --- scripts/get_node_ids.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100755 scripts/get_node_ids.sh diff --git a/scripts/get_node_ids.sh b/scripts/get_node_ids.sh new file mode 100755 index 000000000..1343d0069 --- /dev/null +++ b/scripts/get_node_ids.sh @@ -0,0 +1,36 @@ +#!/bin/sh +gh api graphql -H "X-Github-Next-Global-ID: 1" -f query='{ + calicoctl_binary: repository(owner: "projectcalico", name: "calico") { + id + } + ciliumcli_binary: repository(owner: "cilium", name: "cilium-cli") { + id + } + crictl: repository(owner: "kubernetes-sigs", name: "cri-tools") { + id + } + crio_archive: repository(owner: "cri-o", name: "cri-o") { + id + } + etcd_binary: repository(owner: "etcd-io", name: "etcd") { + id + } + kubectl: repository(owner: "kubernetes", name: "kubernetes") { + id + } + nerdctl_archive: repository(owner: "containerd", name: "nerdctl") { + id + } + runc: repository(owner: "opencontainers", name: "runc") { + id + } + skopeo_binary: repository(owner: "lework", name: "skopeo-binary") { + id + } + yq: repository(owner: "mikefarah", name: "yq") { + id + } + kubernetes: repository(owner: "kubernetes", name: "kubernetes") { + id + } +}' From 7941be127de4ff5ea0e57966c9b30c6206495965 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 16 Nov 2024 18:55:17 +0100 Subject: [PATCH 02/30] downloads: add graphql node ids The Github graphQL API needs IDs for querying a variable array of repository. Use a dict for components instead of an array of url and record the corresponding node ID for each component (there are duplicates because some binaries are provided by the same project/repository). --- scripts/download_hash.py | 70 ++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index ecd901162..22a7a1b6b 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -31,20 +31,62 @@ def version_compare(version): return Version(version.removeprefix("v")) downloads = { - "calicoctl_binary": "https://github.com/projectcalico/calico/releases/download/{version}/SHA256SUMS", - "ciliumcli_binary": "https://github.com/cilium/cilium-cli/releases/download/{version}/cilium-{os}-{arch}.tar.gz.sha256sum", - "cni_binary": "https://github.com/containernetworking/plugins/releases/download/{version}/cni-plugins-{os}-{arch}-{version}.tgz.sha256", - "containerd_archive": "https://github.com/containerd/containerd/releases/download/v{version}/containerd-{version}-{os}-{arch}.tar.gz.sha256sum", - "crictl": "https://github.com/kubernetes-sigs/cri-tools/releases/download/{version}/crictl-{version}-{os}-{arch}.tar.gz.sha256", - "crio_archive": "https://storage.googleapis.com/cri-o/artifacts/cri-o.{arch}.{version}.tar.gz.sha256sum", - "etcd_binary": "https://github.com/etcd-io/etcd/releases/download/{version}/SHA256SUMS", - "kubeadm": "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubeadm.sha256", - "kubectl": "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubectl.sha256", - "kubelet": "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubelet.sha256", - "nerdctl_archive": "https://github.com/containerd/nerdctl/releases/download/v{version}/SHA256SUMS", - "runc": "https://github.com/opencontainers/runc/releases/download/{version}/runc.sha256sum", - "skopeo_binary": "https://github.com/lework/skopeo-binary/releases/download/{version}/skopeo-{os}-{arch}.sha256", - "yq": "https://github.com/mikefarah/yq/releases/download/{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url + "calicoctl_binary": { + 'url': "https://github.com/projectcalico/calico/releases/download/{version}/SHA256SUMS", + 'graphql_id': "R_kgDOA87D0g", + }, + "ciliumcli_binary": { + 'url': "https://github.com/cilium/cilium-cli/releases/download/{version}/cilium-{os}-{arch}.tar.gz.sha256sum", + 'graphql_id': "R_kgDOE0nmLg" + }, + "cni_binary": { + 'url': "https://github.com/containernetworking/plugins/releases/download/{version}/cni-plugins-{os}-{arch}-{version}.tgz.sha256", + 'graphql_id': "R_kgDOBQqEpg", + }, + "containerd_archive": { + 'url': "https://github.com/containerd/containerd/releases/download/v{version}/containerd-{version}-{os}-{arch}.tar.gz.sha256sum", + 'graphql_id': "R_kgDOAr9FWA" + }, + "crictl": { + 'url': "https://github.com/kubernetes-sigs/cri-tools/releases/download/{version}/crictl-{version}-{os}-{arch}.tar.gz.sha256", + 'graphql_id': "R_kgDOBMdURA", + }, + "crio_archive": { + 'url':"https://storage.googleapis.com/cri-o/artifacts/cri-o.{arch}.{version}.tar.gz.sha256sum", + 'graphql_id': "R_kgDOBAr5pg", + }, + "etcd_binary": { + 'url': "https://github.com/etcd-io/etcd/releases/download/{version}/SHA256SUMS", + 'graphql_id': "R_kgDOAKtHtg", + }, + "kubeadm": { + 'url': "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubeadm.sha256", + 'graphql_id': "R_kgDOAToIkg" + }, + "kubectl": { + 'url': "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubectl.sha256", + 'graphql_id': "R_kgDOAToIkg" + }, + "kubelet": { + 'url': "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubelet.sha256", + 'graphql_id': "R_kgDOAToIkg" + }, + "nerdctl_archive": { + 'url': "https://github.com/containerd/nerdctl/releases/download/v{version}/SHA256SUMS", + 'graphql_id': "R_kgDOEvuRnQ", + }, + "runc": { + 'url': "https://github.com/opencontainers/runc/releases/download/{version}/runc.sha256sum", + 'graphql_id': "R_kgDOAjP4QQ", + }, + "skopeo_binary": { + 'url': "https://github.com/lework/skopeo-binary/releases/download/{version}/skopeo-{os}-{arch}.sha256", + 'graphql_id': "R_kgDOHQ6J9w", + }, + "yq": { + 'url':"https://github.com/mikefarah/yq/releases/download/{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url + 'graphql_id': "R_kgDOApOQGQ" + }, } # TODO: downloads not supported # youki: no checkusms in releases From a6219c84c932c19b5359a8028dfdf248c8d6ca36 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 16 Dec 2024 16:21:48 +0100 Subject: [PATCH 03/30] Put graphql query in it's own file --- scripts/list_releases.graphql | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 scripts/list_releases.graphql diff --git a/scripts/list_releases.graphql b/scripts/list_releases.graphql new file mode 100644 index 000000000..6a2d225f1 --- /dev/null +++ b/scripts/list_releases.graphql @@ -0,0 +1,29 @@ +query($repoWithReleases: [ID!]!, $repoWithTags: [ID!]!) { + with_releases: nodes(ids: $repoWithReleases) { + + ... on Repository { + nameWithOwner + releases(first: 100) { + nodes { + tagName + isPrerelease + releaseAssets { + totalCount + } + } + } + } + } + + with_tags: nodes(ids: $repoWithTags) { + + ... on Repository { + nameWithOwner + refs(refPrefix: "refs/tags/", last: 100) { + nodes { + name + } + } + } + } +} From 9f58ba60f3804d8ee9591767c5ad82198b220e39 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 16 Dec 2024 16:24:43 +0100 Subject: [PATCH 04/30] download: compute new versions from Github API We obtain the set of version from Github, then for each component we do a set comparison to determine which versions we don't have. --- scripts/download_hash.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 22a7a1b6b..aa5c45966 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -5,6 +5,7 @@ # with new hashes. import sys +import os from itertools import count, groupby from collections import defaultdict @@ -95,8 +96,8 @@ downloads = { # crun : PGP signatures # cri_dockerd: no checksums or signatures # helm_archive: PGP signatures -# krew_archive: different yaml structure -# calico_crds_archive: different yaml structure +# krew_archive: different yaml structure (in our download) +# calico_crds_archive: different yaml structure (in our download) # TODO: # noarch support -> k8s manifests, helm charts @@ -157,6 +158,36 @@ def download_hash(only_downloads: [str]) -> None: hash_file.raise_for_status() return download_hash_extract[download](hash_file.content.decode()) + nodes_ids = [x['graphql_id'] for x in downloads.values()] + ql_params = { + 'repoWithReleases': nodes_ids, + 'repoWithTags': [], + } + with open("list_releases.graphql") as query: + response = s.post("https://api.github.com/graphql", + json={'query': query.read(), 'variables': ql_params}, + headers={ + "Authorization": f"Bearer {os.environ['API_KEY']}", + } + ) + response.raise_for_status() + github_versions = dict(zip([k + '_checksums' for k in downloads.keys()], + [ + {r["tagName"] for r in repo["releases"]["nodes"] + if not r["isPrerelease"] # and r["releaseAssets"]["totalCount"] > 2 + # instead here we need optionnal custom predicate per-component to filter out + } + for repo in response.json()["data"]["with_releases"] + ], + strict=True)) + + new_versions = { + component: github_versions[component] - set(list(archs.values())[0].keys()) + for component, archs in data.items() if component in [k + '_checksums' for k in downloads.keys()] + } + + + for download, url in (downloads if only_downloads == [] else {k:downloads[k] for k in downloads.keys() & only_downloads}).items(): checksum_name = f"{download}_checksums" From ae68766015b5836ff23c1fe33ccc80ceb5be895a Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 23 Dec 2024 13:48:58 +0100 Subject: [PATCH 05/30] Filter by github results InvalidVersion Containerd use the same repository for releases of it's gRPC API (which we are not interested in). Conveniently, those releases have tags which are not valid version number (being prefixed with 'api/'). This could also be potentially useful for similar cases. The risk of missing releases because of this are low, since it would require that a project issue a new release with an invalid format, then switch back to the previous format (or we miss the fact it's not updating for a long period of time). --- scripts/download_hash.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index aa5c45966..4b2a1eee7 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -13,7 +13,9 @@ from functools import cache import argparse import requests from ruamel.yaml import YAML -from packaging.version import Version +from packaging.version import Version, InvalidVersion + +from typing import Optional CHECKSUMS_YML = "../roles/kubespray-defaults/defaults/main/checksums.yml" @@ -171,11 +173,18 @@ def download_hash(only_downloads: [str]) -> None: } ) response.raise_for_status() + def valid_version(possible_version: str) -> Optional[Version]: + try: + return Version(possible_version) + except InvalidVersion: + return None + github_versions = dict(zip([k + '_checksums' for k in downloads.keys()], [ - {r["tagName"] for r in repo["releases"]["nodes"] - if not r["isPrerelease"] # and r["releaseAssets"]["totalCount"] > 2 - # instead here we need optionnal custom predicate per-component to filter out + { + v for r in repo["releases"]["nodes"] + if not r["isPrerelease"] + and (v := valid_version(r["tagName"])) is not None } for repo in response.json()["data"]["with_releases"] ], From 2be54b2bd73f42a26615019bc3a827e0ba440770 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 23 Dec 2024 13:50:45 +0100 Subject: [PATCH 06/30] Filter new versions for new ones and same minor releases We're only interested in new patch releases for auto-update. --- scripts/download_hash.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 4b2a1eee7..0eeb06d75 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -191,9 +191,20 @@ def download_hash(only_downloads: [str]) -> None: strict=True)) new_versions = { - component: github_versions[component] - set(list(archs.values())[0].keys()) - for component, archs in data.items() if component in [k + '_checksums' for k in downloads.keys()] + component: + {v for v in github_versions[component] + if any(v > version and (v.major, v.minor) == (version.major, version.minor) + for version in [max(minors) for _, minors in groupby(cur_v, lambda v: (v.minor, v.major))]) + # only get: + # - patch versions (no minor or major bump) + # - newer ones (don't get old patch version) } + - set(cur_v) + for component, archs in data.items() + if component in [k + '_checksums' for k in downloads.keys()] + # this is only to bound cur_v in the scope + and (cur_v := sorted(Version(k) for k in next(archs.values().__iter__()).keys())) + } From 24c59cee594adaffd63484515b0460c7ea656942 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 20 Dec 2024 11:15:40 +0100 Subject: [PATCH 07/30] download_hash: adapt download urls to v-less versions --- scripts/download_hash.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 0eeb06d75..bb60803c1 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -35,15 +35,15 @@ def version_compare(version): downloads = { "calicoctl_binary": { - 'url': "https://github.com/projectcalico/calico/releases/download/{version}/SHA256SUMS", + 'url': "https://github.com/projectcalico/calico/releases/download/v{version}/SHA256SUMS", 'graphql_id': "R_kgDOA87D0g", }, "ciliumcli_binary": { - 'url': "https://github.com/cilium/cilium-cli/releases/download/{version}/cilium-{os}-{arch}.tar.gz.sha256sum", + 'url': "https://github.com/cilium/cilium-cli/releases/download/v{version}/cilium-{os}-{arch}.tar.gz.sha256sum", 'graphql_id': "R_kgDOE0nmLg" }, "cni_binary": { - 'url': "https://github.com/containernetworking/plugins/releases/download/{version}/cni-plugins-{os}-{arch}-{version}.tgz.sha256", + 'url': "https://github.com/containernetworking/plugins/releases/download/v{version}/cni-plugins-{os}-{arch}-v{version}.tgz.sha256", 'graphql_id': "R_kgDOBQqEpg", }, "containerd_archive": { @@ -51,27 +51,27 @@ downloads = { 'graphql_id': "R_kgDOAr9FWA" }, "crictl": { - 'url': "https://github.com/kubernetes-sigs/cri-tools/releases/download/{version}/crictl-{version}-{os}-{arch}.tar.gz.sha256", + 'url': "https://github.com/kubernetes-sigs/cri-tools/releases/download/v{version}/crictl-v{version}-{os}-{arch}.tar.gz.sha256", 'graphql_id': "R_kgDOBMdURA", }, "crio_archive": { - 'url':"https://storage.googleapis.com/cri-o/artifacts/cri-o.{arch}.{version}.tar.gz.sha256sum", + 'url':"https://storage.googleapis.com/cri-o/artifacts/cri-o.{arch}.v{version}.tar.gz.sha256sum", 'graphql_id': "R_kgDOBAr5pg", }, "etcd_binary": { - 'url': "https://github.com/etcd-io/etcd/releases/download/{version}/SHA256SUMS", + 'url': "https://github.com/etcd-io/etcd/releases/download/v{version}/SHA256SUMS", 'graphql_id': "R_kgDOAKtHtg", }, "kubeadm": { - 'url': "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubeadm.sha256", + 'url': "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubeadm.sha256", 'graphql_id': "R_kgDOAToIkg" }, "kubectl": { - 'url': "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubectl.sha256", + 'url': "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubectl.sha256", 'graphql_id': "R_kgDOAToIkg" }, "kubelet": { - 'url': "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubelet.sha256", + 'url': "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubelet.sha256", 'graphql_id': "R_kgDOAToIkg" }, "nerdctl_archive": { @@ -79,15 +79,15 @@ downloads = { 'graphql_id': "R_kgDOEvuRnQ", }, "runc": { - 'url': "https://github.com/opencontainers/runc/releases/download/{version}/runc.sha256sum", + 'url': "https://github.com/opencontainers/runc/releases/download/v{version}/runc.sha256sum", 'graphql_id': "R_kgDOAjP4QQ", }, "skopeo_binary": { - 'url': "https://github.com/lework/skopeo-binary/releases/download/{version}/skopeo-{os}-{arch}.sha256", + 'url': "https://github.com/lework/skopeo-binary/releases/download/v{version}/skopeo-{os}-{arch}.sha256", 'graphql_id': "R_kgDOHQ6J9w", }, "yq": { - 'url':"https://github.com/mikefarah/yq/releases/download/{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url + 'url':"https://github.com/mikefarah/yq/releases/download/v{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url 'graphql_id': "R_kgDOApOQGQ" }, } From 38dd224ffe20575a917e3af5aa74245dff3fa931 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 20 Dec 2024 11:18:18 +0100 Subject: [PATCH 08/30] Extract get_hash into it's own function Also, always raise even for 404 not found (should not happen now that we'll use GraphQL to find the exact set of versions) --- scripts/download_hash.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index bb60803c1..536a6ca38 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -149,14 +149,11 @@ def download_hash(only_downloads: [str]) -> None: @cache def _get_hash_by_arch(download: str, version: str) -> {str: str}: - hash_file = s.get(downloads[download].format( + hash_file = s.get(downloads[download]['url'].format( version = version, os = "linux", ), allow_redirects=True) - if hash_file.status_code == 404: - print(f"Unable to find {download} hash file for version {version} at {hash_file.url}") - return None hash_file.raise_for_status() return download_hash_extract[download](hash_file.content.decode()) @@ -206,6 +203,20 @@ def download_hash(only_downloads: [str]) -> None: and (cur_v := sorted(Version(k) for k in next(archs.values().__iter__()).keys())) } + def get_hash(component: str, version: Version, arch: str): + if component in download_hash_extract: + hashes = _get_hash_by_arch(component, version) + return hashes[arch] + else: + hash_file = s.get( + downloads[component]['url'].format( + version = version, + os = "linux", + arch = arch + ), + allow_redirects=True) + hash_file.raise_for_status() + return (hash_file.content.decode().split()[0]) for download, url in (downloads if only_downloads == [] From 08913c4aa0c510bcfc35a32e96ffa64af923a974 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 20 Dec 2024 11:21:54 +0100 Subject: [PATCH 09/30] Don't use 'checksum' in the components names --- scripts/download_hash.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 536a6ca38..1cedb10e1 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -176,20 +176,20 @@ def download_hash(only_downloads: [str]) -> None: except InvalidVersion: return None - github_versions = dict(zip([k + '_checksums' for k in downloads.keys()], - [ - { - v for r in repo["releases"]["nodes"] - if not r["isPrerelease"] - and (v := valid_version(r["tagName"])) is not None - } - for repo in response.json()["data"]["with_releases"] - ], - strict=True)) + github_versions = dict(zip(downloads.keys(), + [ + { + v for r in repo["releases"]["nodes"] + if not r["isPrerelease"] + and (v := valid_version(r["tagName"])) is not None + } + for repo in response.json()["data"]["with_releases"] + ], + strict=True)) new_versions = { - component: - {v for v in github_versions[component] + c: + {v for v in github_versions[c] if any(v > version and (v.major, v.minor) == (version.major, version.minor) for version in [max(minors) for _, minors in groupby(cur_v, lambda v: (v.minor, v.major))]) # only get: @@ -198,7 +198,7 @@ def download_hash(only_downloads: [str]) -> None: } - set(cur_v) for component, archs in data.items() - if component in [k + '_checksums' for k in downloads.keys()] + if (c := component.removesuffix('_checksums')) in downloads.keys() # this is only to bound cur_v in the scope and (cur_v := sorted(Version(k) for k in next(archs.values().__iter__()).keys())) } From 5be8155394340daa3fe9b810b1ef2a9c5053a4fe Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 23 Dec 2024 13:55:29 +0100 Subject: [PATCH 10/30] remove old loops and generators --- scripts/download_hash.py | 57 +--------------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 1cedb10e1..281885054 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -7,7 +7,7 @@ import sys import os -from itertools import count, groupby +from itertools import groupby from collections import defaultdict from functools import cache import argparse @@ -30,9 +30,6 @@ def open_checksums_yaml(): return data, yaml -def version_compare(version): - return Version(version.removeprefix("v")) - downloads = { "calicoctl_binary": { 'url': "https://github.com/projectcalico/calico/releases/download/v{version}/SHA256SUMS", @@ -219,58 +216,6 @@ def download_hash(only_downloads: [str]) -> None: return (hash_file.content.decode().split()[0]) - for download, url in (downloads if only_downloads == [] - else {k:downloads[k] for k in downloads.keys() & only_downloads}).items(): - checksum_name = f"{download}_checksums" - # Propagate new patch versions to all architectures - for arch in data[checksum_name].values(): - for arch2 in data[checksum_name].values(): - arch.update({ - v:("NONE" if arch2[v] == "NONE" else 0) - for v in (set(arch2.keys()) - set(arch.keys())) - if v.split('.')[2] == '0'}) - # this is necessary to make the script indempotent, - # by only adding a vX.X.0 version (=minor release) in each arch - # and letting the rest of the script populate the potential - # patch versions - - for arch, versions in data[checksum_name].items(): - for minor, patches in groupby(versions.copy().keys(), lambda v : '.'.join(v.split('.')[:-1])): - for version in (f"{minor}.{patch}" for patch in - count(start=int(max(patches, key=version_compare).split('.')[-1]), - step=1)): - # Those barbaric generators do the following: - # Group all patches versions by minor number, take the newest and start from that - # to find new versions - if version in versions and versions[version] != 0: - continue - if download in download_hash_extract: - hashes = _get_hash_by_arch(download, version) - if hashes == None: - break - sha256sum = hashes.get(arch) - if sha256sum == None: - break - else: - hash_file = s.get(downloads[download].format( - version = version, - os = "linux", - arch = arch - ), - allow_redirects=True) - if hash_file.status_code == 404: - print(f"Unable to find {download} hash file for version {version} (arch: {arch}) at {hash_file.url}") - break - hash_file.raise_for_status() - sha256sum = hash_file.content.decode().split()[0] - - if len(sha256sum) != 64: - raise Exception(f"Checksum has an unexpected length: {len(sha256sum)} (binary: {download}, arch: {arch}, release: {version}, checksum: '{sha256sum}')") - data[checksum_name][arch][version] = sha256sum - data[checksum_name] = {arch : {r : releases[r] for r in sorted(releases.keys(), - key=version_compare, - reverse=True)} - for arch, releases in data[checksum_name].items()} with open(CHECKSUMS_YML, "w") as checksums_yml: yaml.dump(data, checksums_yml) From c94daa4ff515a66e62684a26a92f9cd3b2cfd284 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 23 Dec 2024 13:53:03 +0100 Subject: [PATCH 11/30] download: Update yaml data with new hashes --- scripts/download_hash.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 281885054..c0c745b01 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -184,6 +184,10 @@ def download_hash(only_downloads: [str]) -> None: ], strict=True)) + components_supported_arch = { + component.removesuffix('_checksums'): [a for a in archs.keys()] + for component, archs in data.items() + } new_versions = { c: {v for v in github_versions[c] @@ -216,6 +220,21 @@ def download_hash(only_downloads: [str]) -> None: return (hash_file.content.decode().split()[0]) + for component, versions in new_versions.items(): + c = component + '_checksums' + for arch in components_supported_arch[component]: + for version in versions: + data[c][arch][str(version)] = f"{downloads[component].get('hashtype', 'sha256')}:{get_hash(component, version, arch)}" + + data[c] = {arch : + {v : + versions[v] for v in sorted(versions.keys(), + key=Version, + reverse=True) + } + for arch, versions in data[c].items() + } + with open(CHECKSUMS_YML, "w") as checksums_yml: yaml.dump(data, checksums_yml) From 9334bc1feebd0f5b7ac314398f2f07e0be72bc34 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 20 Dec 2024 14:56:30 +0100 Subject: [PATCH 12/30] support components with no premade hashes --- scripts/download_hash.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index c0c745b01..be55a714c 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -12,6 +12,7 @@ from collections import defaultdict from functools import cache import argparse import requests +import hashlib from ruamel.yaml import YAML from packaging.version import Version, InvalidVersion @@ -217,6 +218,8 @@ def download_hash(only_downloads: [str]) -> None: ), allow_redirects=True) hash_file.raise_for_status() + if downloads[component].get('binary', False): + return hashlib.sha256(hash_file.content).hexdigest() return (hash_file.content.decode().split()[0]) From 3a44411aa1aa67df4a983702d7364cd854d66a93 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 20 Dec 2024 15:35:36 +0100 Subject: [PATCH 13/30] Support project using alternates names for arch (the url should use `alt_arch` instead of `arch` for those) --- scripts/download_hash.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index be55a714c..5a7508966 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -89,6 +89,14 @@ downloads = { 'graphql_id': "R_kgDOApOQGQ" }, } + +arch_alt_name = { + "amd64": "x86_64", + "arm64": "aarch64", + "ppc64le": None, + "arm": None, +} + # TODO: downloads not supported # youki: no checkusms in releases # kata: no checksums in releases @@ -214,7 +222,8 @@ def download_hash(only_downloads: [str]) -> None: downloads[component]['url'].format( version = version, os = "linux", - arch = arch + arch = arch, + alt_arch = arch_alt_name[arch], ), allow_redirects=True) hash_file.raise_for_status() From 479fda635588ab155de973c39227252792cf5b34 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 20 Dec 2024 15:11:10 +0100 Subject: [PATCH 14/30] download: support cri-dockerd, youki, kata, crun --- scripts/download_hash.py | 26 +++++++++++++++++++++----- scripts/get_node_ids.sh | 12 ++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 5a7508966..937916097 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -48,6 +48,11 @@ downloads = { 'url': "https://github.com/containerd/containerd/releases/download/v{version}/containerd-{version}-{os}-{arch}.tar.gz.sha256sum", 'graphql_id': "R_kgDOAr9FWA" }, + "cri_dockerd_archive": { + 'binary': True, + 'url': "https://github.com/Mirantis/cri-dockerd/releases/download/v{version}/cri-dockerd-{version}.{arch}.tgz", + 'graphql_id': "R_kgDOEvvLcQ", + }, "crictl": { 'url': "https://github.com/kubernetes-sigs/cri-tools/releases/download/v{version}/crictl-v{version}-{os}-{arch}.tar.gz.sha256", 'graphql_id': "R_kgDOBMdURA", @@ -56,10 +61,20 @@ downloads = { 'url':"https://storage.googleapis.com/cri-o/artifacts/cri-o.{arch}.v{version}.tar.gz.sha256sum", 'graphql_id': "R_kgDOBAr5pg", }, + "crun": { + 'url': "https://github.com/containers/crun/releases/download/{version}/crun-{version}-linux-{arch}", + 'binary': True, + 'graphql_id': "R_kgDOBip3vA", + }, "etcd_binary": { 'url': "https://github.com/etcd-io/etcd/releases/download/v{version}/SHA256SUMS", 'graphql_id': "R_kgDOAKtHtg", }, + "kata_containers_binary": { + 'url': "https://github.com/kata-containers/kata-containers/releases/download/{version}/kata-static-{version}-{arch}.tar.xz", + 'binary': True, + 'graphql_id': "R_kgDOBsJsHQ", + }, "kubeadm": { 'url': "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubeadm.sha256", 'graphql_id': "R_kgDOAToIkg" @@ -84,8 +99,13 @@ downloads = { 'url': "https://github.com/lework/skopeo-binary/releases/download/v{version}/skopeo-{os}-{arch}.sha256", 'graphql_id': "R_kgDOHQ6J9w", }, + "youki": { + 'url': "https://github.com/youki-dev/youki/releases/download/v{version}/youki-{version}-{alt_arch}-gnu.tar.gz", + 'binary': True, + 'graphql_id': "R_kgDOFPvgPg", + }, "yq": { - 'url':"https://github.com/mikefarah/yq/releases/download/v{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url + 'url': "https://github.com/mikefarah/yq/releases/download/v{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url 'graphql_id': "R_kgDOApOQGQ" }, } @@ -98,11 +118,7 @@ arch_alt_name = { } # TODO: downloads not supported -# youki: no checkusms in releases -# kata: no checksums in releases # gvisor: sha512 checksums -# crun : PGP signatures -# cri_dockerd: no checksums or signatures # helm_archive: PGP signatures # krew_archive: different yaml structure (in our download) # calico_crds_archive: different yaml structure (in our download) diff --git a/scripts/get_node_ids.sh b/scripts/get_node_ids.sh index 1343d0069..8f2f3f9ba 100755 --- a/scripts/get_node_ids.sh +++ b/scripts/get_node_ids.sh @@ -30,7 +30,19 @@ gh api graphql -H "X-Github-Next-Global-ID: 1" -f query='{ yq: repository(owner: "mikefarah", name: "yq") { id } + youki: repository(owner: "youki-dev", name: "youki") { + id + } kubernetes: repository(owner: "kubernetes", name: "kubernetes") { id } + cri_dockerd: repository(owner: "Mirantis", name: "cri-dockerd") { + id + } + kata: repository(owner: "kata-containers", name: "kata-containers") { + id + } + crun: repository(owner: "containers", name: "crun") { + id + } }' From 6608efb2c44ef116266475c33b25add874a15ac6 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 21 Dec 2024 16:43:56 +0100 Subject: [PATCH 15/30] download: compute version from Github tags for gvisor Gvisor is the only one of our deployed components which use tags instead of proper releases. So the tags scraping support will, for now, cater to gvisor particularities, notably in the tag name format and the fact that some older releases don't have the same URL scheme. --- scripts/download_hash.py | 37 ++++++++++++++++++++++++++++++------- scripts/get_node_ids.sh | 3 +++ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 937916097..b1f7656b1 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -7,7 +7,8 @@ import sys import os -from itertools import groupby +from itertools import groupby, chain +from more_itertools import partition from collections import defaultdict from functools import cache import argparse @@ -70,6 +71,18 @@ downloads = { 'url': "https://github.com/etcd-io/etcd/releases/download/v{version}/SHA256SUMS", 'graphql_id': "R_kgDOAKtHtg", }, + "gvisor_containerd_shim_binary": { + 'url': "https://storage.googleapis.com/gvisor/releases/release/{version}/{alt_arch}/containerd-shim-runsc-v1.sha512", + 'hashtype': "sha512", + 'tags': True, + 'graphql_id': "R_kgDOB9IlXg", + }, + "gvisor_runsc_binary": { + 'url': "https://storage.googleapis.com/gvisor/releases/release/{version}/{alt_arch}/runsc.sha512", + 'hashtype': "sha512", + 'tags': True, + 'graphql_id': "R_kgDOB9IlXg", + }, "kata_containers_binary": { 'url': "https://github.com/kata-containers/kata-containers/releases/download/{version}/kata-static-{version}-{arch}.tar.xz", 'binary': True, @@ -179,10 +192,14 @@ def download_hash(only_downloads: [str]) -> None: hash_file.raise_for_status() return download_hash_extract[download](hash_file.content.decode()) - nodes_ids = [x['graphql_id'] for x in downloads.values()] + + releases, tags = map(dict, + partition(lambda r: r[1].get('tags', False), + {k: downloads[k] for k in (downloads.keys() & only_downloads)}.items() + )) ql_params = { - 'repoWithReleases': nodes_ids, - 'repoWithTags': [], + 'repoWithReleases': [r['graphql_id'] for r in releases.values()], + 'repoWithTags': [t['graphql_id'] for t in tags.values()], } with open("list_releases.graphql") as query: response = s.post("https://api.github.com/graphql", @@ -197,15 +214,21 @@ def download_hash(only_downloads: [str]) -> None: return Version(possible_version) except InvalidVersion: return None - - github_versions = dict(zip(downloads.keys(), + rep = response.json()["data"] + github_versions = dict(zip(chain(releases.keys(), tags.keys()), [ { v for r in repo["releases"]["nodes"] if not r["isPrerelease"] and (v := valid_version(r["tagName"])) is not None } - for repo in response.json()["data"]["with_releases"] + for repo in rep["with_releases"] + ] + + [ + { v for t in repo["refs"]["nodes"] + if (v := valid_version(t["name"].removeprefix('release-'))) is not None + } + for repo in rep["with_tags"] ], strict=True)) diff --git a/scripts/get_node_ids.sh b/scripts/get_node_ids.sh index 8f2f3f9ba..304d44b8a 100755 --- a/scripts/get_node_ids.sh +++ b/scripts/get_node_ids.sh @@ -45,4 +45,7 @@ gh api graphql -H "X-Github-Next-Global-ID: 1" -f query='{ crun: repository(owner: "containers", name: "crun") { id } + gvisor: repository(owner: "google", name: "gvisor") { + id + } }' From ff3d9a04437902c833ec387b38e8476932a17f06 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 21 Dec 2024 16:44:41 +0100 Subject: [PATCH 16/30] download: Support for gvisor (part 2) Gvisor releases, besides only being tags, have some particularities: - they are of the form yyyymmdd.p -> this get interpreted as a yaml float, so we need to explicitely convert to string to make it work. - there is no semver-like attached to the version numbers, but the API (= OCI container runtime interface) is expected to be stable (see linked discussion) - some older tags don't have hashs for some archs Link: https://groups.google.com/g/gvisor-users/c/SxMeHt0Yb6Y/m/Xtv7seULCAAJ --- scripts/download_hash.py | 19 ++++++++++++------- scripts/list_releases.graphql | 2 +- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index b1f7656b1..7abc1838b 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -239,17 +239,22 @@ def download_hash(only_downloads: [str]) -> None: new_versions = { c: {v for v in github_versions[c] - if any(v > version and (v.major, v.minor) == (version.major, version.minor) - for version in [max(minors) for _, minors in groupby(cur_v, lambda v: (v.minor, v.major))]) - # only get: - # - patch versions (no minor or major bump) - # - newer ones (don't get old patch version) + if any(v > version + and ( + (v.major, v.minor) == (version.major, version.minor) + or c.startswith('gvisor') + ) + for version in [max(minors) for _, minors in groupby(cur_v, lambda v: (v.minor, v.major))] + ) + # only get: + # - patch versions (no minor or major bump) (exception for gvisor which does not have a major.minor.patch scheme + # - newer ones (don't get old patch version) } - set(cur_v) for component, archs in data.items() if (c := component.removesuffix('_checksums')) in downloads.keys() # this is only to bound cur_v in the scope - and (cur_v := sorted(Version(k) for k in next(archs.values().__iter__()).keys())) + and (cur_v := sorted(Version(str(k)) for k in next(archs.values().__iter__()).keys())) } def get_hash(component: str, version: Version, arch: str): @@ -280,7 +285,7 @@ def download_hash(only_downloads: [str]) -> None: data[c] = {arch : {v : versions[v] for v in sorted(versions.keys(), - key=Version, + key=lambda v: Version(str(v)), reverse=True) } for arch, versions in data[c].items() diff --git a/scripts/list_releases.graphql b/scripts/list_releases.graphql index 6a2d225f1..fb060db26 100644 --- a/scripts/list_releases.graphql +++ b/scripts/list_releases.graphql @@ -19,7 +19,7 @@ query($repoWithReleases: [ID!]!, $repoWithTags: [ID!]!) { ... on Repository { nameWithOwner - refs(refPrefix: "refs/tags/", last: 100) { + refs(refPrefix: "refs/tags/", last: 25) { nodes { name } From ff768cc9febfd49f0565b8d2f9dccfa1362daa18 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 21 Dec 2024 21:42:06 +0100 Subject: [PATCH 17/30] download: support multiple hash algorithm --- scripts/download_hash.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 7abc1838b..4c7102bcb 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -272,7 +272,10 @@ def download_hash(only_downloads: [str]) -> None: allow_redirects=True) hash_file.raise_for_status() if downloads[component].get('binary', False): - return hashlib.sha256(hash_file.content).hexdigest() + return hashlib.new( + downloads[component].get('hashtype', 'sha256'), + hash_file.content + ).hexdigest() return (hash_file.content.decode().split()[0]) From 9fbc566d98398b5525adcb2c704c463e9216cc48 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sun, 22 Dec 2024 15:14:56 +0100 Subject: [PATCH 18/30] download: Support adding new versions and update the doc --- scripts/download_hash.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 4c7102bcb..0f6cba92e 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -257,6 +257,15 @@ def download_hash(only_downloads: [str]) -> None: and (cur_v := sorted(Version(str(k)) for k in next(archs.values().__iter__()).keys())) } + hash_set_to_0 = { + c: { + Version(str(v)) for v, h in chain.from_iterable(a.items() for a in archs.values()) + if h == 0 + } + for component, archs in data.items() + if (c := component.removesuffix('_checksums')) in downloads.keys() + } + def get_hash(component: str, version: Version, arch: str): if component in download_hash_extract: hashes = _get_hash_by_arch(component, version) @@ -279,7 +288,7 @@ def download_hash(only_downloads: [str]) -> None: return (hash_file.content.decode().split()[0]) - for component, versions in new_versions.items(): + for component, versions in chain(new_versions.items(), hash_set_to_0.items()): c = component + '_checksums' for arch in components_supported_arch[component]: for version in versions: @@ -307,26 +316,24 @@ parser = argparse.ArgumentParser(description=f"Add new patch versions hashes in which means it won't add new major or minor versions. In order to add one of these, edit {CHECKSUMS_YML} by hand, adding the new versions with a patch number of 0 (or the lowest relevant patch versions) + and a hash value of 0. ; then run this script. Note that the script will try to add the versions on all architecture keys already present for a given download target. - The '0' value for a version hash is treated as a missing hash, so the script will try to download it again. - To notify a non-existing version (yanked, or upstream does not have monotonically increasing versions numbers), - use the special value 'NONE'. - EXAMPLES: crictl_checksums: ... amd64: -+ v1.30.0: 0 - v1.29.0: d16a1ffb3938f5a19d5c8f45d363bd091ef89c0bc4d44ad16b933eede32fdcbb - v1.28.0: 8dc78774f7cbeaf787994d386eec663f0a3cf24de1ea4893598096cb39ef2508""" ++ 1.30.0: 0 + 1.29.0: d16a1ffb3938f5a19d5c8f45d363bd091ef89c0bc4d44ad16b933eede32fdcbb + 1.28.0: 8dc78774f7cbeaf787994d386eec663f0a3cf24de1ea4893598096cb39ef2508""" ) -parser.add_argument('binaries', nargs='*', choices=downloads.keys()) +parser.add_argument('binaries', nargs='*', choices=downloads.keys(), + help='if provided, only obtain hashes for these compoments') args = parser.parse_args() download_hash(args.binaries) From 81790cab91e3ea6a76a0ee11e7a10327e514c862 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 23 Dec 2024 14:17:17 +0100 Subject: [PATCH 19/30] download: remove unneeded imports --- scripts/download_hash.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 0f6cba92e..725c71cf0 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -9,7 +9,6 @@ import os from itertools import groupby, chain from more_itertools import partition -from collections import defaultdict from functools import cache import argparse import requests From b08c5e8b14d89c22524378fb9f81245d69679830 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 10 Jan 2025 10:41:27 +0100 Subject: [PATCH 20/30] download: Log Github rate-limit status --- scripts/download_hash.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 725c71cf0..1a2932cc6 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -6,6 +6,7 @@ import sys import os +import logging from itertools import groupby, chain from more_itertools import partition @@ -13,6 +14,7 @@ from functools import cache import argparse import requests import hashlib +from datetime import datetime from ruamel.yaml import YAML from packaging.version import Version, InvalidVersion @@ -20,6 +22,8 @@ from typing import Optional CHECKSUMS_YML = "../roles/kubespray-defaults/defaults/main/checksums.yml" +logger = logging.getLogger(__name__) + def open_checksums_yaml(): yaml = YAML() yaml.explicit_start = True @@ -207,7 +211,14 @@ def download_hash(only_downloads: [str]) -> None: "Authorization": f"Bearer {os.environ['API_KEY']}", } ) + if 'x-ratelimit-used' in response.headers._store: + logger.info("Github graphQL API ratelimit status: used %s of %s. Next reset at %s", + response.headers['X-RateLimit-Used'], + response.headers['X-RateLimit-Limit'], + datetime.fromtimestamp(int(response.headers["X-RateLimit-Reset"])) + ) response.raise_for_status() + def valid_version(possible_version: str) -> Optional[Version]: try: return Version(possible_version) From 4351b47ebe4f057b86c7936f71188fd5469787c2 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 11 Jan 2025 15:45:14 +0100 Subject: [PATCH 21/30] download: convert to logging --- scripts/download_hash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 1a2932cc6..7b7c6e602 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -316,7 +316,7 @@ def download_hash(only_downloads: [str]) -> None: with open(CHECKSUMS_YML, "w") as checksums_yml: yaml.dump(data, checksums_yml) - print(f"\n\nUpdated {CHECKSUMS_YML}\n") + logger.info("Updated %s", CHECKSUMS_YML) parser = argparse.ArgumentParser(description=f"Add new patch versions hashes in {CHECKSUMS_YML}", formatter_class=argparse.RawTextHelpFormatter, From 9b56840d5154d091406710165c77f0b4e08cb260 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 11 Jan 2025 15:41:36 +0100 Subject: [PATCH 22/30] download: create pyproject.toml --- scripts/component_hash_update/pyproject.toml | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 scripts/component_hash_update/pyproject.toml diff --git a/scripts/component_hash_update/pyproject.toml b/scripts/component_hash_update/pyproject.toml new file mode 100644 index 000000000..97894e485 --- /dev/null +++ b/scripts/component_hash_update/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "kubespray_component_hash_update" +version = "1.0.0" +dependencies = [ + "more_itertools", + "ruamel.yaml", + "requests", + "packaging", +] + +requires-python = ">= 3.10" + +authors = [ + { name = "Craig Rodrigues", email = "rodrigc@crodrigues.org" }, + { name = "Simon Wessel" }, + { name = "Max Gautier", email = "mg@max.gautier.name" }, +] +maintainers = [ + { name = "The Kubespray maintainers" }, +] + +description = "Download or compute hashes for new versions of components deployed by Kubespray" + +classifiers = [ + "License :: OSI Approved :: Apache-2.0", +] + +[project.scripts] +update-hashes = "component_hash_update.download:main" From ba3258d7f0bd6a9cdd0c038f685c9815e2d8f01b Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 11 Jan 2025 15:46:19 +0100 Subject: [PATCH 23/30] Move download_hash.py into a python package Can operate on several branches without the need for backport --- .../src/component_hash_update/download.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{download_hash.py => component_hash_update/src/component_hash_update/download.py} (100%) diff --git a/scripts/download_hash.py b/scripts/component_hash_update/src/component_hash_update/download.py similarity index 100% rename from scripts/download_hash.py rename to scripts/component_hash_update/src/component_hash_update/download.py From a551922c84f8985f37549a79af311172dfba603d Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 11 Jan 2025 15:47:07 +0100 Subject: [PATCH 24/30] Adapt download.py to run as a package script --- .../src/component_hash_update/download.py | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/scripts/component_hash_update/src/component_hash_update/download.py b/scripts/component_hash_update/src/component_hash_update/download.py index 7b7c6e602..aa64d5982 100644 --- a/scripts/component_hash_update/src/component_hash_update/download.py +++ b/scripts/component_hash_update/src/component_hash_update/download.py @@ -318,32 +318,36 @@ def download_hash(only_downloads: [str]) -> None: yaml.dump(data, checksums_yml) logger.info("Updated %s", CHECKSUMS_YML) -parser = argparse.ArgumentParser(description=f"Add new patch versions hashes in {CHECKSUMS_YML}", - formatter_class=argparse.RawTextHelpFormatter, - epilog=f""" - This script only lookup new patch versions relative to those already existing - in the data in {CHECKSUMS_YML}, - which means it won't add new major or minor versions. - In order to add one of these, edit {CHECKSUMS_YML} - by hand, adding the new versions with a patch number of 0 (or the lowest relevant patch versions) - and a hash value of 0. - ; then run this script. - Note that the script will try to add the versions on all - architecture keys already present for a given download target. +def main(): - EXAMPLES: + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + parser = argparse.ArgumentParser(description=f"Add new patch versions hashes in {CHECKSUMS_YML}", + formatter_class=argparse.RawTextHelpFormatter, + epilog=f""" + This script only lookup new patch versions relative to those already existing + in the data in {CHECKSUMS_YML}, + which means it won't add new major or minor versions. + In order to add one of these, edit {CHECKSUMS_YML} + by hand, adding the new versions with a patch number of 0 (or the lowest relevant patch versions) + and a hash value of 0. + ; then run this script. - crictl_checksums: - ... - amd64: -+ 1.30.0: 0 - 1.29.0: d16a1ffb3938f5a19d5c8f45d363bd091ef89c0bc4d44ad16b933eede32fdcbb - 1.28.0: 8dc78774f7cbeaf787994d386eec663f0a3cf24de1ea4893598096cb39ef2508""" + Note that the script will try to add the versions on all + architecture keys already present for a given download target. -) -parser.add_argument('binaries', nargs='*', choices=downloads.keys(), - help='if provided, only obtain hashes for these compoments') + EXAMPLES: -args = parser.parse_args() -download_hash(args.binaries) + crictl_checksums: + ... + amd64: + + 1.30.0: 0 + 1.29.0: d16a1ffb3938f5a19d5c8f45d363bd091ef89c0bc4d44ad16b933eede32fdcbb + 1.28.0: 8dc78774f7cbeaf787994d386eec663f0a3cf24de1ea4893598096cb39ef2508""" + + ) + parser.add_argument('binaries', nargs='*', choices=downloads.keys(), + help='if provided, only obtain hashes for these compoments') + + args = parser.parse_args() + download_hash(args.binaries) From 76e07daa12d76a227ddc96664bd715080f8034cb Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sat, 11 Jan 2025 16:03:25 +0100 Subject: [PATCH 25/30] download: put grapqQL query in package + read from importlib --- scripts/component_hash_update/pyproject.toml | 4 +++- .../src/component_hash_update/__init__.py | 0 .../src/component_hash_update/download.py | 15 ++++++++------- .../component_hash_update}/list_releases.graphql | 0 4 files changed, 11 insertions(+), 8 deletions(-) create mode 100644 scripts/component_hash_update/src/component_hash_update/__init__.py rename scripts/{ => component_hash_update/src/component_hash_update}/list_releases.graphql (100%) diff --git a/scripts/component_hash_update/pyproject.toml b/scripts/component_hash_update/pyproject.toml index 97894e485..ddf27831a 100644 --- a/scripts/component_hash_update/pyproject.toml +++ b/scripts/component_hash_update/pyproject.toml @@ -1,5 +1,7 @@ [build-system] -requires = ["setuptools >= 61.0"] +requires = ["setuptools >= 61.0", + "setuptools_scm >= 8.0", +] build-backend = "setuptools.build_meta" [project] diff --git a/scripts/component_hash_update/src/component_hash_update/__init__.py b/scripts/component_hash_update/src/component_hash_update/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/component_hash_update/src/component_hash_update/download.py b/scripts/component_hash_update/src/component_hash_update/download.py index aa64d5982..0b54800ed 100644 --- a/scripts/component_hash_update/src/component_hash_update/download.py +++ b/scripts/component_hash_update/src/component_hash_update/download.py @@ -17,6 +17,7 @@ import hashlib from datetime import datetime from ruamel.yaml import YAML from packaging.version import Version, InvalidVersion +from importlib.resources import files from typing import Optional @@ -204,13 +205,13 @@ def download_hash(only_downloads: [str]) -> None: 'repoWithReleases': [r['graphql_id'] for r in releases.values()], 'repoWithTags': [t['graphql_id'] for t in tags.values()], } - with open("list_releases.graphql") as query: - response = s.post("https://api.github.com/graphql", - json={'query': query.read(), 'variables': ql_params}, - headers={ - "Authorization": f"Bearer {os.environ['API_KEY']}", - } - ) + response = s.post("https://api.github.com/graphql", + json={'query': files(__package__).joinpath('list_releases.graphql').read_text(), + 'variables': ql_params}, + headers={ + "Authorization": f"Bearer {os.environ['API_KEY']}", + } + ) if 'x-ratelimit-used' in response.headers._store: logger.info("Github graphQL API ratelimit status: used %s of %s. Next reset at %s", response.headers['X-RateLimit-Used'], diff --git a/scripts/list_releases.graphql b/scripts/component_hash_update/src/component_hash_update/list_releases.graphql similarity index 100% rename from scripts/list_releases.graphql rename to scripts/component_hash_update/src/component_hash_update/list_releases.graphql From 55cff4f3d32f8a00ae1f2e160d5cde67b7cfff8b Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Sun, 12 Jan 2025 14:40:23 +0100 Subject: [PATCH 26/30] download: get checksums file relative to git root This means the update-hashes command can be run anywhere in Kubespray repository without having to figure out the correct path. --- .../src/component_hash_update/download.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/component_hash_update/src/component_hash_update/download.py b/scripts/component_hash_update/src/component_hash_update/download.py index 0b54800ed..10705ddc6 100644 --- a/scripts/component_hash_update/src/component_hash_update/download.py +++ b/scripts/component_hash_update/src/component_hash_update/download.py @@ -7,6 +7,7 @@ import sys import os import logging +import subprocess from itertools import groupby, chain from more_itertools import partition @@ -18,20 +19,21 @@ from datetime import datetime from ruamel.yaml import YAML from packaging.version import Version, InvalidVersion from importlib.resources import files +from pathlib import Path from typing import Optional -CHECKSUMS_YML = "../roles/kubespray-defaults/defaults/main/checksums.yml" +CHECKSUMS_YML = Path("roles/kubespray-defaults/defaults/main/checksums.yml") logger = logging.getLogger(__name__) -def open_checksums_yaml(): +def open_yaml(file: Path): yaml = YAML() yaml.explicit_start = True yaml.preserve_quotes = True yaml.width = 4096 - with open(CHECKSUMS_YML, "r") as checksums_yml: + with open(file, "r") as checksums_yml: data = yaml.load(checksums_yml) return data, yaml @@ -182,7 +184,11 @@ def download_hash(only_downloads: [str]) -> None: }, } - data, yaml = open_checksums_yaml() + checksums_file = Path(subprocess.Popen(['git', 'rev-parse', '--show-toplevel'], + stdout=subprocess.PIPE).communicate()[0].rstrip().decode('utf-8') + ) / CHECKSUMS_YML + logger.info("Opening checksums file %s...", checksums_file) + data, yaml = open_yaml(checksums_file) s = requests.Session() @cache @@ -315,9 +321,9 @@ def download_hash(only_downloads: [str]) -> None: } - with open(CHECKSUMS_YML, "w") as checksums_yml: + with open(checksums_file, "w") as checksums_yml: yaml.dump(data, checksums_yml) - logger.info("Updated %s", CHECKSUMS_YML) + logger.info("Updated %s", checksums_file) def main(): From d17bd286ea0ba2d23232ca933743c84d6e216e0a Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 13 Jan 2025 14:17:11 +0100 Subject: [PATCH 27/30] download: allow excluding some component This is handy when some component releases is buggy (missing file at the download links) to not block everything else. Move the filtering up the stack so we don't have to do it multiples times. --- .../src/component_hash_update/download.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/scripts/component_hash_update/src/component_hash_update/download.py b/scripts/component_hash_update/src/component_hash_update/download.py index 10705ddc6..b31419b7f 100644 --- a/scripts/component_hash_update/src/component_hash_update/download.py +++ b/scripts/component_hash_update/src/component_hash_update/download.py @@ -21,7 +21,7 @@ from packaging.version import Version, InvalidVersion from importlib.resources import files from pathlib import Path -from typing import Optional +from typing import Optional, Any CHECKSUMS_YML = Path("roles/kubespray-defaults/defaults/main/checksums.yml") @@ -148,7 +148,7 @@ arch_alt_name = { # different verification methods (gpg, cosign) ( needs download role changes) (or verify the sig in this script and only use the checksum in the playbook) # perf improvements (async) -def download_hash(only_downloads: [str]) -> None: +def download_hash(downloads: {str: {str: Any}}) -> None: # Handle file with multiples hashes, with various formats. # the lambda is expected to produce a dictionary of hashes indexed by arch name download_hash_extract = { @@ -203,10 +203,7 @@ def download_hash(only_downloads: [str]) -> None: return download_hash_extract[download](hash_file.content.decode()) - releases, tags = map(dict, - partition(lambda r: r[1].get('tags', False), - {k: downloads[k] for k in (downloads.keys() & only_downloads)}.items() - )) + releases, tags = map(dict, partition(lambda r: r[1].get('tags', False), downloads.items())) ql_params = { 'repoWithReleases': [r['graphql_id'] for r in releases.values()], 'repoWithTags': [t['graphql_id'] for t in tags.values()], @@ -353,8 +350,25 @@ def main(): 1.28.0: 8dc78774f7cbeaf787994d386eec663f0a3cf24de1ea4893598096cb39ef2508""" ) - parser.add_argument('binaries', nargs='*', choices=downloads.keys(), - help='if provided, only obtain hashes for these compoments') + + # Workaround for https://github.com/python/cpython/issues/53834#issuecomment-2060825835 + # Fixed in python 3.14 + class Choices(tuple): + + def __init__(self, _iterable=None, default=None): + self.default = default or [] + + def __contains__(self, item): + return super().__contains__(item) or item == self.default + + choices = Choices(downloads.keys(), default=list(downloads.keys())) + + parser.add_argument('only', nargs='*', choices=choices, + help='if provided, only obtain hashes for these compoments', + default=choices.default) + parser.add_argument('-e', '--exclude', action='append', choices=downloads.keys(), + help='do not obtain hashes for this component', + default=[]) args = parser.parse_args() - download_hash(args.binaries) + download_hash({k: downloads[k] for k in (set(args.only) - set(args.exclude))}) From 4d3f06e69e26ccef881fb09a48c9e9782a92322a Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 13 Jan 2025 16:53:34 +0100 Subject: [PATCH 28/30] download: cleanup graphQL query - remove unused parts in the response - clarify variables names --- .../src/component_hash_update/download.py | 14 +++++++------- .../component_hash_update/list_releases.graphql | 11 +++-------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/scripts/component_hash_update/src/component_hash_update/download.py b/scripts/component_hash_update/src/component_hash_update/download.py index b31419b7f..00e398b13 100644 --- a/scripts/component_hash_update/src/component_hash_update/download.py +++ b/scripts/component_hash_update/src/component_hash_update/download.py @@ -204,13 +204,13 @@ def download_hash(downloads: {str: {str: Any}}) -> None: releases, tags = map(dict, partition(lambda r: r[1].get('tags', False), downloads.items())) - ql_params = { - 'repoWithReleases': [r['graphql_id'] for r in releases.values()], - 'repoWithTags': [t['graphql_id'] for t in tags.values()], + repos = { + 'with_releases': [r['graphql_id'] for r in releases.values()], + 'with_tags': [t['graphql_id'] for t in tags.values()], } response = s.post("https://api.github.com/graphql", json={'query': files(__package__).joinpath('list_releases.graphql').read_text(), - 'variables': ql_params}, + 'variables': repos}, headers={ "Authorization": f"Bearer {os.environ['API_KEY']}", } @@ -228,7 +228,7 @@ def download_hash(downloads: {str: {str: Any}}) -> None: return Version(possible_version) except InvalidVersion: return None - rep = response.json()["data"] + repos = response.json()["data"] github_versions = dict(zip(chain(releases.keys(), tags.keys()), [ { @@ -236,13 +236,13 @@ def download_hash(downloads: {str: {str: Any}}) -> None: if not r["isPrerelease"] and (v := valid_version(r["tagName"])) is not None } - for repo in rep["with_releases"] + for repo in repos["with_releases"] ] + [ { v for t in repo["refs"]["nodes"] if (v := valid_version(t["name"].removeprefix('release-'))) is not None } - for repo in rep["with_tags"] + for repo in repos["with_tags"] ], strict=True)) diff --git a/scripts/component_hash_update/src/component_hash_update/list_releases.graphql b/scripts/component_hash_update/src/component_hash_update/list_releases.graphql index fb060db26..9d781458b 100644 --- a/scripts/component_hash_update/src/component_hash_update/list_releases.graphql +++ b/scripts/component_hash_update/src/component_hash_update/list_releases.graphql @@ -1,24 +1,19 @@ -query($repoWithReleases: [ID!]!, $repoWithTags: [ID!]!) { - with_releases: nodes(ids: $repoWithReleases) { +query($with_releases: [ID!]!, $with_tags: [ID!]!) { + with_releases: nodes(ids: $with_releases) { ... on Repository { - nameWithOwner releases(first: 100) { nodes { tagName isPrerelease - releaseAssets { - totalCount - } } } } } - with_tags: nodes(ids: $repoWithTags) { + with_tags: nodes(ids: $with_tags) { ... on Repository { - nameWithOwner refs(refPrefix: "refs/tags/", last: 25) { nodes { name From d8629b8e7e2b34df744906fdcbf87f27d0043fba Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Wed, 15 Jan 2025 14:32:49 +0100 Subject: [PATCH 29/30] download: separate static metadata into it's own file By separating logic from data, we should make it easier to add new components. --- .../src/component_hash_update/components.py | 94 +++++++++++++++++++ .../src/component_hash_update/download.py | 92 +----------------- 2 files changed, 96 insertions(+), 90 deletions(-) create mode 100644 scripts/component_hash_update/src/component_hash_update/components.py diff --git a/scripts/component_hash_update/src/component_hash_update/components.py b/scripts/component_hash_update/src/component_hash_update/components.py new file mode 100644 index 000000000..00855121b --- /dev/null +++ b/scripts/component_hash_update/src/component_hash_update/components.py @@ -0,0 +1,94 @@ +""" +Static download metadata for components updated by the update-hashes command. +""" + +infos = { + "calicoctl_binary": { + "url": "https://github.com/projectcalico/calico/releases/download/v{version}/SHA256SUMS", + "graphql_id": "R_kgDOA87D0g", + }, + "ciliumcli_binary": { + "url": "https://github.com/cilium/cilium-cli/releases/download/v{version}/cilium-{os}-{arch}.tar.gz.sha256sum", + "graphql_id": "R_kgDOE0nmLg", + }, + "cni_binary": { + "url": "https://github.com/containernetworking/plugins/releases/download/v{version}/cni-plugins-{os}-{arch}-v{version}.tgz.sha256", + "graphql_id": "R_kgDOBQqEpg", + }, + "containerd_archive": { + "url": "https://github.com/containerd/containerd/releases/download/v{version}/containerd-{version}-{os}-{arch}.tar.gz.sha256sum", + "graphql_id": "R_kgDOAr9FWA", + }, + "cri_dockerd_archive": { + "binary": True, + "url": "https://github.com/Mirantis/cri-dockerd/releases/download/v{version}/cri-dockerd-{version}.{arch}.tgz", + "graphql_id": "R_kgDOEvvLcQ", + }, + "crictl": { + "url": "https://github.com/kubernetes-sigs/cri-tools/releases/download/v{version}/crictl-v{version}-{os}-{arch}.tar.gz.sha256", + "graphql_id": "R_kgDOBMdURA", + }, + "crio_archive": { + "url": "https://storage.googleapis.com/cri-o/artifacts/cri-o.{arch}.v{version}.tar.gz.sha256sum", + "graphql_id": "R_kgDOBAr5pg", + }, + "crun": { + "url": "https://github.com/containers/crun/releases/download/{version}/crun-{version}-linux-{arch}", + "binary": True, + "graphql_id": "R_kgDOBip3vA", + }, + "etcd_binary": { + "url": "https://github.com/etcd-io/etcd/releases/download/v{version}/SHA256SUMS", + "graphql_id": "R_kgDOAKtHtg", + }, + "gvisor_containerd_shim_binary": { + "url": "https://storage.googleapis.com/gvisor/releases/release/{version}/{alt_arch}/containerd-shim-runsc-v1.sha512", + "hashtype": "sha512", + "tags": True, + "graphql_id": "R_kgDOB9IlXg", + }, + "gvisor_runsc_binary": { + "url": "https://storage.googleapis.com/gvisor/releases/release/{version}/{alt_arch}/runsc.sha512", + "hashtype": "sha512", + "tags": True, + "graphql_id": "R_kgDOB9IlXg", + }, + "kata_containers_binary": { + "url": "https://github.com/kata-containers/kata-containers/releases/download/{version}/kata-static-{version}-{arch}.tar.xz", + "binary": True, + "graphql_id": "R_kgDOBsJsHQ", + }, + "kubeadm": { + "url": "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubeadm.sha256", + "graphql_id": "R_kgDOAToIkg", + }, + "kubectl": { + "url": "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubectl.sha256", + "graphql_id": "R_kgDOAToIkg", + }, + "kubelet": { + "url": "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubelet.sha256", + "graphql_id": "R_kgDOAToIkg", + }, + "nerdctl_archive": { + "url": "https://github.com/containerd/nerdctl/releases/download/v{version}/SHA256SUMS", + "graphql_id": "R_kgDOEvuRnQ", + }, + "runc": { + "url": "https://github.com/opencontainers/runc/releases/download/v{version}/runc.sha256sum", + "graphql_id": "R_kgDOAjP4QQ", + }, + "skopeo_binary": { + "url": "https://github.com/lework/skopeo-binary/releases/download/v{version}/skopeo-{os}-{arch}.sha256", + "graphql_id": "R_kgDOHQ6J9w", + }, + "youki": { + "url": "https://github.com/youki-dev/youki/releases/download/v{version}/youki-{version}-{alt_arch}-gnu.tar.gz", + "binary": True, + "graphql_id": "R_kgDOFPvgPg", + }, + "yq": { + "url": "https://github.com/mikefarah/yq/releases/download/v{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url + "graphql_id": "R_kgDOApOQGQ", + }, +} diff --git a/scripts/component_hash_update/src/component_hash_update/download.py b/scripts/component_hash_update/src/component_hash_update/download.py index 00e398b13..2e0b8e007 100644 --- a/scripts/component_hash_update/src/component_hash_update/download.py +++ b/scripts/component_hash_update/src/component_hash_update/download.py @@ -23,6 +23,8 @@ from pathlib import Path from typing import Optional, Any +from . import components + CHECKSUMS_YML = Path("roles/kubespray-defaults/defaults/main/checksums.yml") logger = logging.getLogger(__name__) @@ -38,96 +40,6 @@ def open_yaml(file: Path): return data, yaml -downloads = { - "calicoctl_binary": { - 'url': "https://github.com/projectcalico/calico/releases/download/v{version}/SHA256SUMS", - 'graphql_id': "R_kgDOA87D0g", - }, - "ciliumcli_binary": { - 'url': "https://github.com/cilium/cilium-cli/releases/download/v{version}/cilium-{os}-{arch}.tar.gz.sha256sum", - 'graphql_id': "R_kgDOE0nmLg" - }, - "cni_binary": { - 'url': "https://github.com/containernetworking/plugins/releases/download/v{version}/cni-plugins-{os}-{arch}-v{version}.tgz.sha256", - 'graphql_id': "R_kgDOBQqEpg", - }, - "containerd_archive": { - 'url': "https://github.com/containerd/containerd/releases/download/v{version}/containerd-{version}-{os}-{arch}.tar.gz.sha256sum", - 'graphql_id': "R_kgDOAr9FWA" - }, - "cri_dockerd_archive": { - 'binary': True, - 'url': "https://github.com/Mirantis/cri-dockerd/releases/download/v{version}/cri-dockerd-{version}.{arch}.tgz", - 'graphql_id': "R_kgDOEvvLcQ", - }, - "crictl": { - 'url': "https://github.com/kubernetes-sigs/cri-tools/releases/download/v{version}/crictl-v{version}-{os}-{arch}.tar.gz.sha256", - 'graphql_id': "R_kgDOBMdURA", - }, - "crio_archive": { - 'url':"https://storage.googleapis.com/cri-o/artifacts/cri-o.{arch}.v{version}.tar.gz.sha256sum", - 'graphql_id': "R_kgDOBAr5pg", - }, - "crun": { - 'url': "https://github.com/containers/crun/releases/download/{version}/crun-{version}-linux-{arch}", - 'binary': True, - 'graphql_id': "R_kgDOBip3vA", - }, - "etcd_binary": { - 'url': "https://github.com/etcd-io/etcd/releases/download/v{version}/SHA256SUMS", - 'graphql_id': "R_kgDOAKtHtg", - }, - "gvisor_containerd_shim_binary": { - 'url': "https://storage.googleapis.com/gvisor/releases/release/{version}/{alt_arch}/containerd-shim-runsc-v1.sha512", - 'hashtype': "sha512", - 'tags': True, - 'graphql_id': "R_kgDOB9IlXg", - }, - "gvisor_runsc_binary": { - 'url': "https://storage.googleapis.com/gvisor/releases/release/{version}/{alt_arch}/runsc.sha512", - 'hashtype': "sha512", - 'tags': True, - 'graphql_id': "R_kgDOB9IlXg", - }, - "kata_containers_binary": { - 'url': "https://github.com/kata-containers/kata-containers/releases/download/{version}/kata-static-{version}-{arch}.tar.xz", - 'binary': True, - 'graphql_id': "R_kgDOBsJsHQ", - }, - "kubeadm": { - 'url': "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubeadm.sha256", - 'graphql_id': "R_kgDOAToIkg" - }, - "kubectl": { - 'url': "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubectl.sha256", - 'graphql_id': "R_kgDOAToIkg" - }, - "kubelet": { - 'url': "https://dl.k8s.io/release/v{version}/bin/linux/{arch}/kubelet.sha256", - 'graphql_id': "R_kgDOAToIkg" - }, - "nerdctl_archive": { - 'url': "https://github.com/containerd/nerdctl/releases/download/v{version}/SHA256SUMS", - 'graphql_id': "R_kgDOEvuRnQ", - }, - "runc": { - 'url': "https://github.com/opencontainers/runc/releases/download/v{version}/runc.sha256sum", - 'graphql_id': "R_kgDOAjP4QQ", - }, - "skopeo_binary": { - 'url': "https://github.com/lework/skopeo-binary/releases/download/v{version}/skopeo-{os}-{arch}.sha256", - 'graphql_id': "R_kgDOHQ6J9w", - }, - "youki": { - 'url': "https://github.com/youki-dev/youki/releases/download/v{version}/youki-{version}-{alt_arch}-gnu.tar.gz", - 'binary': True, - 'graphql_id': "R_kgDOFPvgPg", - }, - "yq": { - 'url': "https://github.com/mikefarah/yq/releases/download/v{version}/checksums-bsd", # see https://github.com/mikefarah/yq/pull/1691 for why we use this url - 'graphql_id': "R_kgDOApOQGQ" - }, -} arch_alt_name = { "amd64": "x86_64", From bc36e9d440b1a0a77e232151d3f09dca006ed5fe Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Wed, 15 Jan 2025 14:34:48 +0100 Subject: [PATCH 30/30] hash-updater: apply formatter --- .../src/component_hash_update/download.py | 321 ++++++++++-------- 1 file changed, 185 insertions(+), 136 deletions(-) diff --git a/scripts/component_hash_update/src/component_hash_update/download.py b/scripts/component_hash_update/src/component_hash_update/download.py index 2e0b8e007..40a8e40a5 100644 --- a/scripts/component_hash_update/src/component_hash_update/download.py +++ b/scripts/component_hash_update/src/component_hash_update/download.py @@ -29,6 +29,7 @@ CHECKSUMS_YML = Path("roles/kubespray-defaults/defaults/main/checksums.yml") logger = logging.getLogger(__name__) + def open_yaml(file: Path): yaml = YAML() yaml.explicit_start = True @@ -60,45 +61,55 @@ arch_alt_name = { # different verification methods (gpg, cosign) ( needs download role changes) (or verify the sig in this script and only use the checksum in the playbook) # perf improvements (async) + def download_hash(downloads: {str: {str: Any}}) -> None: # Handle file with multiples hashes, with various formats. # the lambda is expected to produce a dictionary of hashes indexed by arch name download_hash_extract = { - "calicoctl_binary": lambda hashes : { - line.split('-')[-1] : line.split()[0] - for line in hashes.strip().split('\n') - if line.count('-') == 2 and line.split('-')[-2] == "linux" - }, - "etcd_binary": lambda hashes : { - line.split('-')[-1].removesuffix('.tar.gz') : line.split()[0] - for line in hashes.strip().split('\n') - if line.split('-')[-2] == "linux" - }, - "nerdctl_archive": lambda hashes : { - line.split()[1].removesuffix('.tar.gz').split('-')[3] : line.split()[0] - for line in hashes.strip().split('\n') - if [x for x in line.split(' ') if x][1].split('-')[2] == "linux" - }, - "runc": lambda hashes : { - parts[1].split('.')[1] : parts[0] - for parts in (line.split() - for line in hashes.split('\n')[3:9]) - }, - "yq": lambda rhashes_bsd : { - pair[0].split('_')[-1] : pair[1] - # pair = (yq__, ) - for pair in ((line.split()[1][1:-1], line.split()[3]) - for line in rhashes_bsd.splitlines() - if line.startswith("SHA256")) - if pair[0].startswith("yq") - and pair[0].split('_')[1] == "linux" - and not pair[0].endswith(".tar.gz") - }, - } + "calicoctl_binary": lambda hashes: { + line.split("-")[-1]: line.split()[0] + for line in hashes.strip().split("\n") + if line.count("-") == 2 and line.split("-")[-2] == "linux" + }, + "etcd_binary": lambda hashes: { + line.split("-")[-1].removesuffix(".tar.gz"): line.split()[0] + for line in hashes.strip().split("\n") + if line.split("-")[-2] == "linux" + }, + "nerdctl_archive": lambda hashes: { + line.split()[1].removesuffix(".tar.gz").split("-")[3]: line.split()[0] + for line in hashes.strip().split("\n") + if [x for x in line.split(" ") if x][1].split("-")[2] == "linux" + }, + "runc": lambda hashes: { + parts[1].split(".")[1]: parts[0] + for parts in (line.split() for line in hashes.split("\n")[3:9]) + }, + "yq": lambda rhashes_bsd: { + pair[0].split("_")[-1]: pair[1] + # pair = (yq__, ) + for pair in ( + (line.split()[1][1:-1], line.split()[3]) + for line in rhashes_bsd.splitlines() + if line.startswith("SHA256") + ) + if pair[0].startswith("yq") + and pair[0].split("_")[1] == "linux" + and not pair[0].endswith(".tar.gz") + }, + } - checksums_file = Path(subprocess.Popen(['git', 'rev-parse', '--show-toplevel'], - stdout=subprocess.PIPE).communicate()[0].rstrip().decode('utf-8') - ) / CHECKSUMS_YML + checksums_file = ( + Path( + subprocess.Popen( + ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE + ) + .communicate()[0] + .rstrip() + .decode("utf-8") + ) + / CHECKSUMS_YML + ) logger.info("Opening checksums file %s...", checksums_file) data, yaml = open_yaml(checksums_file) s = requests.Session() @@ -106,33 +117,40 @@ def download_hash(downloads: {str: {str: Any}}) -> None: @cache def _get_hash_by_arch(download: str, version: str) -> {str: str}: - hash_file = s.get(downloads[download]['url'].format( - version = version, - os = "linux", + hash_file = s.get( + downloads[download]["url"].format( + version=version, + os="linux", ), - allow_redirects=True) + allow_redirects=True, + ) hash_file.raise_for_status() return download_hash_extract[download](hash_file.content.decode()) - - releases, tags = map(dict, partition(lambda r: r[1].get('tags', False), downloads.items())) + releases, tags = map( + dict, partition(lambda r: r[1].get("tags", False), downloads.items()) + ) repos = { - 'with_releases': [r['graphql_id'] for r in releases.values()], - 'with_tags': [t['graphql_id'] for t in tags.values()], + "with_releases": [r["graphql_id"] for r in releases.values()], + "with_tags": [t["graphql_id"] for t in tags.values()], } - response = s.post("https://api.github.com/graphql", - json={'query': files(__package__).joinpath('list_releases.graphql').read_text(), - 'variables': repos}, - headers={ - "Authorization": f"Bearer {os.environ['API_KEY']}", - } - ) - if 'x-ratelimit-used' in response.headers._store: - logger.info("Github graphQL API ratelimit status: used %s of %s. Next reset at %s", - response.headers['X-RateLimit-Used'], - response.headers['X-RateLimit-Limit'], - datetime.fromtimestamp(int(response.headers["X-RateLimit-Reset"])) - ) + response = s.post( + "https://api.github.com/graphql", + json={ + "query": files(__package__).joinpath("list_releases.graphql").read_text(), + "variables": repos, + }, + headers={ + "Authorization": f"Bearer {os.environ['API_KEY']}", + }, + ) + if "x-ratelimit-used" in response.headers._store: + logger.info( + "Github graphQL API ratelimit status: used %s of %s. Next reset at %s", + response.headers["X-RateLimit-Used"], + response.headers["X-RateLimit-Limit"], + datetime.fromtimestamp(int(response.headers["X-RateLimit-Reset"])), + ) response.raise_for_status() def valid_version(possible_version: str) -> Optional[Version]: @@ -140,57 +158,76 @@ def download_hash(downloads: {str: {str: Any}}) -> None: return Version(possible_version) except InvalidVersion: return None + repos = response.json()["data"] - github_versions = dict(zip(chain(releases.keys(), tags.keys()), - [ - { - v for r in repo["releases"]["nodes"] - if not r["isPrerelease"] - and (v := valid_version(r["tagName"])) is not None - } - for repo in repos["with_releases"] - ] + - [ - { v for t in repo["refs"]["nodes"] - if (v := valid_version(t["name"].removeprefix('release-'))) is not None - } - for repo in repos["with_tags"] - ], - strict=True)) + github_versions = dict( + zip( + chain(releases.keys(), tags.keys()), + [ + { + v + for r in repo["releases"]["nodes"] + if not r["isPrerelease"] + and (v := valid_version(r["tagName"])) is not None + } + for repo in repos["with_releases"] + ] + + [ + { + v + for t in repo["refs"]["nodes"] + if (v := valid_version(t["name"].removeprefix("release-"))) + is not None + } + for repo in repos["with_tags"] + ], + strict=True, + ) + ) components_supported_arch = { - component.removesuffix('_checksums'): [a for a in archs.keys()] - for component, archs in data.items() - } + component.removesuffix("_checksums"): [a for a in archs.keys()] + for component, archs in data.items() + } new_versions = { - c: - {v for v in github_versions[c] - if any(v > version - and ( - (v.major, v.minor) == (version.major, version.minor) - or c.startswith('gvisor') - ) - for version in [max(minors) for _, minors in groupby(cur_v, lambda v: (v.minor, v.major))] - ) - # only get: - # - patch versions (no minor or major bump) (exception for gvisor which does not have a major.minor.patch scheme - # - newer ones (don't get old patch version) - } - - set(cur_v) - for component, archs in data.items() - if (c := component.removesuffix('_checksums')) in downloads.keys() - # this is only to bound cur_v in the scope - and (cur_v := sorted(Version(str(k)) for k in next(archs.values().__iter__()).keys())) + c: { + v + for v in github_versions[c] + if any( + v > version + and ( + (v.major, v.minor) == (version.major, version.minor) + or c.startswith("gvisor") + ) + for version in [ + max(minors) + for _, minors in groupby(cur_v, lambda v: (v.minor, v.major)) + ] + ) + # only get: + # - patch versions (no minor or major bump) (exception for gvisor which does not have a major.minor.patch scheme + # - newer ones (don't get old patch version) } + - set(cur_v) + for component, archs in data.items() + if (c := component.removesuffix("_checksums")) in downloads.keys() + # this is only to bound cur_v in the scope + and ( + cur_v := sorted( + Version(str(k)) for k in next(archs.values().__iter__()).keys() + ) + ) + } hash_set_to_0 = { - c: { - Version(str(v)) for v, h in chain.from_iterable(a.items() for a in archs.values()) - if h == 0 - } - for component, archs in data.items() - if (c := component.removesuffix('_checksums')) in downloads.keys() - } + c: { + Version(str(v)) + for v, h in chain.from_iterable(a.items() for a in archs.values()) + if h == 0 + } + for component, archs in data.items() + if (c := component.removesuffix("_checksums")) in downloads.keys() + } def get_hash(component: str, version: Version, arch: str): if component in download_hash_extract: @@ -198,37 +235,38 @@ def download_hash(downloads: {str: {str: Any}}) -> None: return hashes[arch] else: hash_file = s.get( - downloads[component]['url'].format( - version = version, - os = "linux", - arch = arch, - alt_arch = arch_alt_name[arch], - ), - allow_redirects=True) + downloads[component]["url"].format( + version=version, + os="linux", + arch=arch, + alt_arch=arch_alt_name[arch], + ), + allow_redirects=True, + ) hash_file.raise_for_status() - if downloads[component].get('binary', False): + if downloads[component].get("binary", False): return hashlib.new( - downloads[component].get('hashtype', 'sha256'), - hash_file.content - ).hexdigest() - return (hash_file.content.decode().split()[0]) - + downloads[component].get("hashtype", "sha256"), hash_file.content + ).hexdigest() + return hash_file.content.decode().split()[0] for component, versions in chain(new_versions.items(), hash_set_to_0.items()): - c = component + '_checksums' + c = component + "_checksums" for arch in components_supported_arch[component]: for version in versions: - data[c][arch][str(version)] = f"{downloads[component].get('hashtype', 'sha256')}:{get_hash(component, version, arch)}" - - data[c] = {arch : - {v : - versions[v] for v in sorted(versions.keys(), - key=lambda v: Version(str(v)), - reverse=True) - } - for arch, versions in data[c].items() - } + data[c][arch][ + str(version) + ] = f"{downloads[component].get('hashtype', 'sha256')}:{get_hash(component, version, arch)}" + data[c] = { + arch: { + v: versions[v] + for v in sorted( + versions.keys(), key=lambda v: Version(str(v)), reverse=True + ) + } + for arch, versions in data[c].items() + } with open(checksums_file, "w") as checksums_yml: yaml.dump(data, checksums_yml) @@ -238,9 +276,10 @@ def download_hash(downloads: {str: {str: Any}}) -> None: def main(): logging.basicConfig(stream=sys.stdout, level=logging.INFO) - parser = argparse.ArgumentParser(description=f"Add new patch versions hashes in {CHECKSUMS_YML}", - formatter_class=argparse.RawTextHelpFormatter, - epilog=f""" + parser = argparse.ArgumentParser( + description=f"Add new patch versions hashes in {CHECKSUMS_YML}", + formatter_class=argparse.RawTextHelpFormatter, + epilog=f""" This script only lookup new patch versions relative to those already existing in the data in {CHECKSUMS_YML}, which means it won't add new major or minor versions. @@ -259,8 +298,7 @@ def main(): amd64: + 1.30.0: 0 1.29.0: d16a1ffb3938f5a19d5c8f45d363bd091ef89c0bc4d44ad16b933eede32fdcbb - 1.28.0: 8dc78774f7cbeaf787994d386eec663f0a3cf24de1ea4893598096cb39ef2508""" - + 1.28.0: 8dc78774f7cbeaf787994d386eec663f0a3cf24de1ea4893598096cb39ef2508""", ) # Workaround for https://github.com/python/cpython/issues/53834#issuecomment-2060825835 @@ -273,14 +311,25 @@ def main(): def __contains__(self, item): return super().__contains__(item) or item == self.default - choices = Choices(downloads.keys(), default=list(downloads.keys())) + choices = Choices(components.infos.keys(), default=list(components.infos.keys())) - parser.add_argument('only', nargs='*', choices=choices, - help='if provided, only obtain hashes for these compoments', - default=choices.default) - parser.add_argument('-e', '--exclude', action='append', choices=downloads.keys(), - help='do not obtain hashes for this component', - default=[]) + parser.add_argument( + "only", + nargs="*", + choices=choices, + help="if provided, only obtain hashes for these compoments", + default=choices.default, + ) + parser.add_argument( + "-e", + "--exclude", + action="append", + choices=components.infos.keys(), + help="do not obtain hashes for this component", + default=[], + ) args = parser.parse_args() - download_hash({k: downloads[k] for k in (set(args.only) - set(args.exclude))}) + download_hash( + {k: components.infos[k] for k in (set(args.only) - set(args.exclude))} + )