about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-01-29 02:50:57 +0000
committerbors <bors@rust-lang.org>2024-01-29 02:50:57 +0000
commit7e43442eb63f927ea1bfde62fe5f1f485e842291 (patch)
treec052f13c7136e271e2661cf90e5173aeb452b8e8
parent8a0b5ae1996721b38ca84af0b81a36e83d0d0c2c (diff)
parent09e0d4f89a2c591b0d131fe0ddc8dd7bd7f20092 (diff)
downloadrust-7e43442eb63f927ea1bfde62fe5f1f485e842291.tar.gz
rust-7e43442eb63f927ea1bfde62fe5f1f485e842291.zip
Auto merge of #119290 - Kobzol:ci-docker-registry-cache, r=Mark-simulacrum
Cache CI Docker images in ghcr registry

This PR changes the way `rust-lang` caches Docker images used in CI workflows. Before, the intermediate Docker layers were manually exported from `docker history` and backed up in S3. However, this approach doesn't work any more with the Docker version used by GitHub Actions since August 2023. We had to revert to disabling Docker BuildKit to make the old caching work, but this workaround will stop working eventually, after GitHub updates Docker again and the old build backend will be removed.

This PR changes the caching to use [Docker caching](https://docs.docker.com/build/cache/) instead. There are several backends for the cache, for our use-case S3 and Docker registry makes sense. This PR uses the Docker registry backend and uses the ghcr.io registry.

The caching creates a Docker image labeled `rust-ci`, which is currently stored to the `ghcr.io/rust-lang-ci` package registry. This image appears [here](https://ghcr.io/rust-lang-ci/rust-ci). The image is stored in `rust-lang-ci` and not `rust-lang`, because `try` and `auto` builds run in the context of that repository, so the used `GITHUB_TOKEN` has permissions for it (unlike for `rust-lang`).

For pull request CI runs, the provided `GITHUB_TOKEN` reduces its permissions automatically to `packages: read`, which means that we won't be able to write the Docker image. If we're not able to write, we won't have anything to read. So I disabled the caching entirely for PR runs (it makes it slightly faster to build the Docker image if we don't have to deal with exporting and using a separate build driver). Note that before this PR, we also weren't able to read or write the cache on PR runs.

Rustup part of this change is [here](https://github.com/rust-lang/rustup/pull/3648).

Related issue: https://github.com/rust-lang/infra-team/issues/81

r? `@Mark-Simulacrum`
-rw-r--r--.github/workflows/ci.yml4
-rwxr-xr-xsrc/ci/docker/run.sh116
-rw-r--r--src/ci/github-actions/ci.yml2
3 files changed, 64 insertions, 58 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index caf97abf78d..853f43e15e3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,6 +28,7 @@ name: CI
       - "**"
 permissions:
   contents: read
+  packages: write
 defaults:
   run:
     shell: bash
@@ -42,6 +43,7 @@ jobs:
       CI_JOB_NAME: "${{ matrix.name }}"
       CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
       HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
+      DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
       SCCACHE_BUCKET: rust-lang-ci-sccache2
       TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
       CACHE_DOMAIN: ci-caches.rust-lang.org
@@ -172,6 +174,7 @@ jobs:
       CI_JOB_NAME: "${{ matrix.name }}"
       CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
       HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
+      DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
       SCCACHE_BUCKET: rust-lang-ci-sccache2
       DEPLOY_BUCKET: rust-lang-ci2
       TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
@@ -554,6 +557,7 @@ jobs:
       CI_JOB_NAME: "${{ matrix.name }}"
       CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
       HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
+      DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
       SCCACHE_BUCKET: rust-lang-ci-sccache2
       DEPLOY_BUCKET: rust-lang-ci2
       TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
diff --git a/src/ci/docker/run.sh b/src/ci/docker/run.sh
index 55eed95492d..0db61204f77 100755
--- a/src/ci/docker/run.sh
+++ b/src/ci/docker/run.sh
@@ -74,25 +74,6 @@ if [ -f "$docker_dir/$image/Dockerfile" ]; then
 
       cksum=$(sha512sum $hash_key | \
         awk '{print $1}')
-
-      url="https://$CACHE_DOMAIN/docker/$cksum"
-
-      echo "Attempting to download $url"
-      rm -f /tmp/rustci_docker_cache
-      set +e
-      retry curl --max-time 600 -y 30 -Y 10 --connect-timeout 30 -f -L -C - \
-        -o /tmp/rustci_docker_cache "$url"
-
-      docker_archive_hash=$(sha512sum /tmp/rustci_docker_cache | awk '{print $1}')
-      echo "Downloaded archive hash: ${docker_archive_hash}"
-
-      echo "Loading images into docker"
-      # docker load sometimes hangs in the CI, so time out after 10 minutes with TERM,
-      # KILL after 12 minutes
-      loaded_images=$(/usr/bin/timeout -k 720 600 docker load -i /tmp/rustci_docker_cache \
-        | sed 's/.* sha/sha/')
-      set -e
-      printf "Downloaded containers:\n$loaded_images\n"
     fi
 
     dockerfile="$docker_dir/$image/Dockerfile"
@@ -103,46 +84,65 @@ if [ -f "$docker_dir/$image/Dockerfile" ]; then
         context="$script_dir"
     fi
     echo "::group::Building docker image for $image"
-
-    # As of August 2023, Github Actions have updated Docker to 23.X,
-    # which uses the BuildKit by default. It currently throws aways all
-    # intermediate layers, which breaks our usage of S3 layer caching.
-    # Therefore we opt-in to the old build backend for now.
-    export DOCKER_BUILDKIT=0
-    retry docker \
-      build \
-      --rm \
-      -t rust-ci \
-      -f "$dockerfile" \
-      "$context"
-    echo "::endgroup::"
-
-    if [ "$CI" != "" ]; then
-      s3url="s3://$SCCACHE_BUCKET/docker/$cksum"
-      upload="aws s3 cp - $s3url"
-      digest=$(docker inspect rust-ci --format '{{.Id}}')
-      echo "Built container $digest"
-      if ! grep -q "$digest" <(echo "$loaded_images"); then
-        echo "Uploading finished image $digest to $url"
-        set +e
-        # Print image history for easier debugging of layer SHAs
-        docker history rust-ci
-        docker history -q rust-ci | \
-          grep -v missing | \
-          xargs docker save | \
-          gzip | \
-          $upload
-        set -e
-      else
-        echo "Looks like docker image is the same as before, not uploading"
-      fi
-      # Record the container image for reuse, e.g. by rustup.rs builds
-      info="$dist/image-$image.txt"
-      mkdir -p "$dist"
-      echo "$url" >"$info"
-      echo "$digest" >>"$info"
-      cat "$info"
+    echo "Image input"
+    cat $hash_key
+    echo "Image input checksum ${cksum}"
+
+    # Print docker version
+    docker --version
+
+    # On non-CI or PR jobs, we don't have permissions to write to the registry cache, so we should
+    # not use `docker login` nor caching.
+    if [[ "$CI" == "" ]] || [[ "$PR_CI_JOB" == "1" ]];
+    then
+        retry docker build --rm -t rust-ci -f "$dockerfile" "$context"
+    else
+        REGISTRY=ghcr.io
+        # Most probably rust-lang-ci, but in general the owner of the repository where CI runs
+        REGISTRY_USERNAME=${GITHUB_REPOSITORY_OWNER}
+        # Tag used to push the final Docker image, so that it can be pulled by e.g. rustup
+        IMAGE_TAG=${REGISTRY}/${REGISTRY_USERNAME}/rust-ci:${cksum}
+        # Tag used to cache the Docker build
+        # It seems that it cannot be the same as $IMAGE_TAG, otherwise it overwrites the cache
+        CACHE_IMAGE_TAG=${REGISTRY}/${REGISTRY_USERNAME}/rust-ci-cache:${cksum}
+
+        # Log into the Docker registry, so that we can read/write cache and the final image
+        echo ${DOCKER_TOKEN} | docker login ${REGISTRY} \
+            --username ${REGISTRY_USERNAME} \
+            --password-stdin
+
+        # Enable a new Docker driver so that --cache-from/to works with a registry backend
+        docker buildx create --use --driver docker-container
+
+        # Build the image using registry caching backend
+        retry docker \
+          buildx \
+          build \
+          --rm \
+          -t rust-ci \
+          -f "$dockerfile" \
+          --cache-from type=registry,ref=${CACHE_IMAGE_TAG} \
+          --cache-to type=registry,ref=${CACHE_IMAGE_TAG},compression=zstd \
+          --output=type=docker \
+          "$context"
+
+        # Print images for debugging purposes
+        docker images
+
+        # Tag the built image and push it to the registry
+        docker tag rust-ci "${IMAGE_TAG}"
+        docker push "${IMAGE_TAG}"
+
+        # Record the container registry tag/url for reuse, e.g. by rustup.rs builds
+        # It should be possible to run `docker pull <$IMAGE_TAG>` to download the image
+        info="$dist/image-$image.txt"
+        mkdir -p "$dist"
+        echo "${IMAGE_TAG}" > "$info"
+        cat "$info"
+
+        echo "To download the image, run docker pull ${IMAGE_TAG}"
     fi
+    echo "::endgroup::"
 elif [ -f "$docker_dir/disabled/$image/Dockerfile" ]; then
     if isCI; then
         echo Cannot run disabled images on CI!
diff --git a/src/ci/github-actions/ci.yml b/src/ci/github-actions/ci.yml
index 68a3afc910f..ceee4690004 100644
--- a/src/ci/github-actions/ci.yml
+++ b/src/ci/github-actions/ci.yml
@@ -34,6 +34,7 @@ x--expand-yaml-anchors--remove:
     CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
     # commit of PR sha or commit sha. `GITHUB_SHA` is not accurate for PRs.
     HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+    DOCKER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
   - &public-variables
     SCCACHE_BUCKET: rust-lang-ci-sccache2
@@ -301,6 +302,7 @@ on:
 
 permissions:
   contents: read
+  packages: write
 
 defaults:
   run: