diff --git a/linux/build/docker/Dockerfile.builder b/linux/build/docker/Dockerfile.builder index 58f98ae..83ed5e8 100644 --- a/linux/build/docker/Dockerfile.builder +++ b/linux/build/docker/Dockerfile.builder @@ -1,51 +1,78 @@ -# SilverMetal Linux — reproducible-build runner image. +# SilverMetal Linux — reproducible-build runner image (systemd-in-container). # -# This image is the "build host" for the ISO. Pinning it by digest is the -# only thing keeping host-toolchain drift out of the reproducibility gate, so -# do NOT replace the FROM line with a tag-only reference. +# This image is the "build host" for the ISO. derivative-maker's build +# steps assume a real systemd-managed Debian host — they call +# `systemctl restart approx-...`, `systemctl daemon-reload`, etc. and +# expect the services they configure to actually run. The clean way to +# satisfy that without a per-service whack-a-mole is to follow upstream's +# own container pattern: systemd as PID 1 inside the container, with an +# entrypoint that records the user-supplied command, masks irrelevant +# units, and execs systemd. systemd then runs `docker-entrypoint.service` +# which executes the recorded command, propagating its exit code back to +# `docker run`. # -# Build & push (run from repo root): +# The vendored systemd entrypoint files live in +# linux/build/docker/systemd-entrypoint/ and are pinned 1:1 to upstream's +# pattern (see that directory's README for the bump procedure). +# +# Pinning by digest is the only thing keeping host-toolchain drift out +# of the reproducibility gate, so do NOT replace the FROM line with a +# tag-only reference. +# +# Build & push (run on 10.0.0.51 — never on the WSL/aarch64 dev box): # docker build \ # -f linux/build/docker/Dockerfile.builder \ -# -t docker-registry:5000/silvermetal-builder: \ -# -t docker-registry:5000/silvermetal-builder:latest \ +# -t docker-registry.silverlabs.uk/silvermetal-builder: \ +# -t docker-registry.silverlabs.uk/silvermetal-builder:latest \ # linux/build/docker -# docker push docker-registry:5000/silvermetal-builder: +# docker push docker-registry.silverlabs.uk/silvermetal-builder: # # To bump the base image: replace the digest, rebuild, push, update # BUILDER_IMAGE in linux/build/scripts/build.sh, run a full reproducibility -# check, commit all four changes together. +# check, commit all the changes together. # debian:trixie-slim — pinned by digest. -# Resolved 2026-05-07 via `docker pull debian:trixie-slim` on the runner host. -# Trixie (Debian 13) is what the pinned derivative-maker tag expects; its -# 1100_sanity-tests reads /etc/os-release and exits if the codename is -# anything other than `trixie`. Upstream's own derivative-maker/docker/ -# Dockerfile uses the same FROM. Bumping this requires rebuilding + -# pushing the silvermetal-builder image AND updating BUILDER_IMAGE in -# linux/build/scripts/build.sh in the same commit. +# Resolved 2026-05-07 via `docker pull debian:trixie-slim` on 10.0.0.51. FROM debian:trixie-slim@sha256:cedb1ef40439206b673ee8b33a46a03a0c9fa90bf3732f54704f99cb061d2c5a -# Reproducibility-friendly apt configuration. ENV DEBIAN_FRONTEND=noninteractive \ LC_ALL=C.UTF-8 \ LANG=C.UTF-8 \ - SOURCE_DATE_EPOCH=0 + SOURCE_DATE_EPOCH=0 \ + USER=user \ + HOME=/home/user \ + container=docker # Pinned package versions. These come from the same snapshot.debian.org -# timestamp as the ISO build, so a Dockerfile rebuild against that snapshot -# produces the same toolchain bit-for-bit. The actual snapshot URL is -# substituted at build time via --build-arg APT_SNAPSHOT_URL=... +# timestamp as the ISO build, so a Dockerfile rebuild against that +# snapshot produces the same toolchain bit-for-bit. ARG APT_SNAPSHOT_URL="https://snapshot.debian.org/archive/debian/20260415T000000Z" ARG APT_SECURITY_SNAPSHOT_URL="https://snapshot.debian.org/archive/debian-security/20260415T000000Z" # Two-phase install: -# 1. Use the base image's default mirror to seed ca-certificates so HTTPS -# to snapshot.debian.org works. (slim images don't ship CA bundles.) +# 1. Use the base image's default mirror to seed ca-certificates so +# HTTPS to snapshot.debian.org works. (slim images don't ship +# CA bundles by default.) # 2. Pin sources.list to the snapshot and install the actual toolchain. -# The first phase touches deb.debian.org without a pin; that's fine because -# nothing it installs ends up in the final ISO — only the toolchain installed -# in phase 2 does, and that is fully snapshot-pinned. +# Phase 1 touches deb.debian.org without a pin; that's fine because +# nothing it installs ends up in the final ISO — only the toolchain +# installed in phase 2 does, and that is fully snapshot-pinned. +# +# Package set explanation: +# - systemd / systemd-sysv / dbus / dbus-user-session +# systemd-in-container runtime; PID 1 of the build container +# - sq sqv sqop sequoia-git sequoia-chameleon-gnupg gpg-agent +# upstream's commit-signature verification stack (sq-git etc.) +# - approx +# package proxy started by 1200_prepare-build-machine; with +# systemd as PID 1, the .socket / @.service units actually fire +# - dpkg-dev fakeroot fasttrack-archive-keyring safe-rm adduser sudo +# ca-certificates git time curl lsb-release +# baseline tools derivative-maker assumes are present +# - debootstrap diffoscope-minimal dosfstools isolinux live-build +# mtools reprepro rsync squashfs-tools syslinux-common xorriso +# SilverMetal ISO toolchain (live-build chain + diff for the +# reproducibility gate) RUN set -eux; \ apt-get update; \ apt-get install -y --no-install-recommends ca-certificates; \ @@ -54,58 +81,57 @@ RUN set -eux; \ printf 'deb [check-valid-until=no] %s trixie-security main\n' "$APT_SECURITY_SNAPSHOT_URL" >> /etc/apt/sources.list; \ apt-get -o Acquire::Check-Valid-Until=false update; \ apt-get install -y --no-install-recommends \ + systemd \ + systemd-sysv \ + dbus \ + dbus-user-session \ + sq \ + sqv \ + sqop \ + sequoia-git \ + sequoia-chameleon-gnupg \ + gpg-agent \ + approx \ + adduser \ + ca-certificates \ + curl \ + dpkg-dev \ + fakeroot \ + fasttrack-archive-keyring \ + git \ + gnupg \ + lsb-release \ + safe-rm \ + sudo \ + time \ debootstrap \ diffoscope-minimal \ dosfstools \ - fakeroot \ - git \ - gnupg \ - gpg-agent \ isolinux \ live-build \ mtools \ reprepro \ rsync \ - sequoia-chameleon-gnupg \ - sequoia-git \ - sq \ - sqop \ - sqv \ squashfs-tools \ - sudo \ syslinux-common \ xorriso; \ apt-get clean; \ rm -rf /var/lib/apt/lists/* -# systemctl no-op shim. -# derivative-maker's build steps call `sudo systemctl daemon-reload` / -# `systemctl restart approx` / etc. as part of host-machine preparation, -# assuming systemd is PID 1 on the build host. Upstream's own container -# image runs systemd-in-container; we don't, so any real systemctl call -# would fail. The shim returns success for every invocation and logs -# what was attempted, which is the standard pattern for running -# systemd-aware build scripts in transient containers without actual -# systemd. /usr/local/bin precedes /usr/bin in both default $PATH and -# sudo's secure_path, so this masks any real systemctl that might land -# later via package install. -RUN printf '%s\n' '#!/bin/sh' \ - '# systemctl no-op shim for systemd-less build containers.' \ - '# Logs the attempt to stderr and returns success.' \ - 'echo "systemctl-shim: $*" >&2' \ - 'exit 0' \ - > /usr/local/bin/systemctl \ - && chmod 0755 /usr/local/bin/systemctl +# Non-root build user with passwordless sudo. +# Naming matches upstream's docker setup (USER=user, HOME=/home/user) +# so derivative-maker's docker-aware checks line up. +RUN adduser --quiet --disabled-password --home "${HOME}" --gecos "${USER},,,," "${USER}" \ + && printf '%s\n' "${USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/passwordless_sudo \ + && chmod 440 /etc/sudoers.d/passwordless_sudo -# Non-root user for derivative-maker. -# Kicksecure's derivative-maker explicitly refuses to run as root and uses -# sudo internally for its privileged operations (debootstrap, mksquashfs, -# chroot mounts). build.sh chowns the workspace to this user inside the -# container, then runuser's to it before invoking derivative-maker. -# uid 1000 is conventional and plays nicely with bind mounts of files -# created by other Linux tools. -RUN useradd --uid 1000 --create-home --shell /bin/bash builder \ - && echo 'builder ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/builder \ - && chmod 440 /etc/sudoers.d/builder +# systemd-in-container entrypoint — vendored from upstream's +# derivative-maker/docker/. See systemd-entrypoint/README.md. +COPY systemd-entrypoint/entrypoint.sh /usr/local/bin/entrypoint.sh +COPY systemd-entrypoint/docker-entrypoint.service /etc/systemd/system/docker-entrypoint.service +COPY systemd-entrypoint/docker-entrypoint.target /etc/systemd/system/docker-entrypoint.target +COPY systemd-entrypoint/docker-entrypoint-stop.sh /usr/bin/docker-entrypoint-stop.sh +RUN chmod +x /usr/local/bin/entrypoint.sh /usr/bin/docker-entrypoint-stop.sh -WORKDIR /work +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD ["/bin/bash"] diff --git a/linux/build/docker/systemd-entrypoint/README.md b/linux/build/docker/systemd-entrypoint/README.md new file mode 100644 index 0000000..2fe3ca0 --- /dev/null +++ b/linux/build/docker/systemd-entrypoint/README.md @@ -0,0 +1,32 @@ +# Vendored systemd-in-container entrypoint + +These four files are copied verbatim from +`linux/build/derivative-maker/docker/` (entrypoint.sh and the build-data/ +systemd units). They implement upstream Kicksecure's pattern for +running derivative-maker inside a container that uses systemd as PID 1 +— required because derivative-maker's build steps assume a real +systemd-managed host (calls `systemctl restart approx-...`, +`daemon-reload`, etc., and expects those services to actually run). + +## Why vendored, not COPY'd from the submodule path + +The `Dockerfile.builder` build context is `linux/build/docker/`; COPY +cannot reach above that. Vendoring is small (4 files, ~3 KB total) and +gives us a single point to bump if upstream changes the pattern. + +## Bump procedure + +When the `derivative-maker` submodule pin is bumped (see +`derivative-maker.PIN.md`), re-vendor by re-copying: + + cp linux/build/derivative-maker/docker/entrypoint.sh \ + linux/build/docker/systemd-entrypoint/entrypoint.sh + cp linux/build/derivative-maker/docker/build-data/docker-entrypoint.service \ + linux/build/docker/systemd-entrypoint/ + cp linux/build/derivative-maker/docker/build-data/docker-entrypoint.target \ + linux/build/docker/systemd-entrypoint/ + cp linux/build/derivative-maker/docker/build-data/docker-entrypoint-stop.sh \ + linux/build/docker/systemd-entrypoint/ + +Source upstream tag at last vendor: `18.1.7.4-developers-only` +(2026-05-07). diff --git a/linux/build/docker/systemd-entrypoint/docker-entrypoint-stop.sh b/linux/build/docker/systemd-entrypoint/docker-entrypoint-stop.sh new file mode 100644 index 0000000..c149ccc --- /dev/null +++ b/linux/build/docker/systemd-entrypoint/docker-entrypoint-stop.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC +## See the file COPYING for copying conditions. + +set -e + +## EXIT_STATUS is set by systemd. +## +## EXIT_STATUS is either an exit code integer or a signal name string, see +## systemd.exec(5) +if echo "${EXIT_STATUS}" | grep [A-Z] > /dev/null; then + 1>&2 printf '%s\n' "got signal ${EXIT_STATUS}" + systemctl exit $(( 128 + $( kill -l "${EXIT_STATUS}" ) )) +else + systemctl exit "${EXIT_STATUS}" +fi diff --git a/linux/build/docker/systemd-entrypoint/docker-entrypoint.service b/linux/build/docker/systemd-entrypoint/docker-entrypoint.service new file mode 100644 index 0000000..0282632 --- /dev/null +++ b/linux/build/docker/systemd-entrypoint/docker-entrypoint.service @@ -0,0 +1,17 @@ +## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC +## See the file COPYING for copying conditions. + +[Unit] +Description=docker-entrypoint.service + +[Service] +ExecStartPre=/bin/bash -e -x -c "cat -- /etc/docker-entrypoint-cmd" +ExecStart=/bin/bash -e -x -c /etc/docker-entrypoint-cmd +ExecStopPost=/usr/bin/docker-entrypoint-stop.sh +StandardInput=tty-force +StandardOutput=inherit +StandardError=inherit +EnvironmentFile=/etc/docker-entrypoint-env + +[Install] +WantedBy=multi-user.target diff --git a/linux/build/docker/systemd-entrypoint/docker-entrypoint.target b/linux/build/docker/systemd-entrypoint/docker-entrypoint.target new file mode 100644 index 0000000..ee21afe --- /dev/null +++ b/linux/build/docker/systemd-entrypoint/docker-entrypoint.target @@ -0,0 +1,6 @@ +## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC +## See the file COPYING for copying conditions. + +[Unit] +Description=the target for docker-entrypoint.service +Requires=docker-entrypoint.service systemd-logind.service systemd-user-sessions.service diff --git a/linux/build/docker/systemd-entrypoint/entrypoint.sh b/linux/build/docker/systemd-entrypoint/entrypoint.sh new file mode 100644 index 0000000..a3c64e8 --- /dev/null +++ b/linux/build/docker/systemd-entrypoint/entrypoint.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC +## See the file COPYING for copying conditions. + +## TODO: document + +set -x +set -o errexit +set -o nounset +set -o errtrace +set -o pipefail + +container=docker +export container + +if [ $# -eq 0 ]; then + printf '%s\n' 'ERROR: No command specified. You probably want to run "journalctl -f", or maybe "bash"?' >&2 + exit 1 +fi + +if [ ! -t 0 ]; then + printf '%s\n' 'ERROR: TTY needs to be enabled ("docker run -t ...").' >&2 + exit 1 +fi + +env | tee -- /etc/docker-entrypoint-env >/dev/null + +## Debugging. +cat -- /etc/docker-entrypoint-env + +quoted_args="$(printf " %q" "${@}")" +printf '%s\n' "${quoted_args}" | tee -- /etc/docker-entrypoint-cmd >/dev/null +chmod +x -- /etc/docker-entrypoint-cmd + +systemctl mask systemd-firstboot.service systemd-udevd.service systemd-modules-load.service +systemctl unmask systemd-logind +systemctl enable docker-entrypoint.service + +systemd= +if [ -x /lib/systemd/systemd ]; then + systemd=/lib/systemd/systemd +elif [ -x /usr/lib/systemd/systemd ]; then + systemd=/usr/lib/systemd/systemd +elif [ -x /sbin/init ]; then + systemd=/sbin/init +else + printf '%s\n' 'ERROR: systemd is not installed' >&2 + exit 1 +fi + +declare -a systemd_args=( + --show-status=false + --unit=docker-entrypoint.target +) + +printf '%s\n' "$0: starting $systemd ${systemd_args[*]}" + +exec "$systemd" "${systemd_args[@]}" diff --git a/linux/build/scripts/build-inner.sh b/linux/build/scripts/build-inner.sh index 04726d1..b47fb20 100755 --- a/linux/build/scripts/build-inner.sh +++ b/linux/build/scripts/build-inner.sh @@ -2,13 +2,16 @@ # SilverMetal Linux — inner build step. # # Runs *inside* the silvermetal-builder container, as the unprivileged -# `builder` user. build.sh sets up the container, chowns the workspace, -# and runuser's into here. derivative-maker takes it from there and uses -# sudo internally for its privileged operations. +# `user` (uid 1000). build.sh's docker-run cmd chowns the workspace and +# sudoes here. The container's PID 1 is systemd (upstream's +# systemd-in-container pattern), so any `systemctl` calls derivative- +# maker makes — to start approx, daemon-reload, etc. — actually do +# what they're supposed to. derivative-maker uses sudo internally for +# its privileged ops. # # Why this is its own file: # The previous incarnation lived as a heredoc inside build.sh's docker -# run command. Once we needed to drop privileges from root to builder, +# run command. Once we needed to drop privileges from root to user, # the nested-heredoc / nested-quoting situation became unreadable; a # plain script with normal quoting is far easier to maintain. # diff --git a/linux/build/scripts/build.sh b/linux/build/scripts/build.sh index 22cf8b5..74ee2f5 100755 --- a/linux/build/scripts/build.sh +++ b/linux/build/scripts/build.sh @@ -32,7 +32,7 @@ cd "${REPO_ROOT}" # outside the LAN — it's the entry that fleet-wide /etc/docker/daemon.json # registers as an insecure-registry. The host-style "docker-registry:5000" # is *not* DNS-resolvable; do not use it. -BUILDER_IMAGE="${BUILDER_IMAGE:-docker-registry.silverlabs.uk/silvermetal-builder@sha256:70f160ab6084c49b81262e3625425848eb678c4b13175fb1b201cfb1fa075460}" +BUILDER_IMAGE="${BUILDER_IMAGE:-docker-registry.silverlabs.uk/silvermetal-builder@sha256:dc9dd29df4bee54807aee5bb2605b400754cba86db5343b4947a81a7ecea8811}" if [[ "${BUILDER_IMAGE}" != *"@sha256:"* ]]; then echo "build.sh: BUILDER_IMAGE must be pinned by digest, got: ${BUILDER_IMAGE}" >&2 @@ -105,11 +105,36 @@ else fi # --- Run the build inside the container ------------------------------------ -# --privileged is required because live-build mounts loop devices and chroots. -# --network=host lets the container reach snapshot.debian.org without us -# fighting CI proxy config; tighten if/when that becomes a concern. +# This is a systemd-in-container build host. Upstream Kicksecure's +# derivative-maker assumes a real systemd-managed Debian — its build steps +# call `systemctl restart approx-derivative-maker.socket`, +# `systemctl daemon-reload`, etc. and depend on those services *actually* +# running. Without systemd as PID 1 we'd be playing whack-a-mole with +# every service derivative-maker starts. +# +# Required runtime flags for systemd-in-container: +# --privileged live-build needs loop devices + chroot mounts +# --cgroupns=host systemd needs to manage cgroups; with its own +# namespace it can't see the host hierarchy +# --tmpfs /run, /run/lock systemd writes runtime state here +# -v /sys/fs/cgroup:rw the cgroup tree systemd manages +# -t entrypoint.sh requires a TTY (it `exit 1`s on +# stdin not a tty); allocating one keeps that +# path happy in CI too where stdin is otherwise +# /dev/null +# +# `tail -f /dev/null` is NOT used — control flow goes through systemd: +# entrypoint.sh writes the user command to /etc/docker-entrypoint-cmd, +# execs systemd, systemd boots docker-entrypoint.service which runs the +# command, and docker-entrypoint-stop.sh propagates exit code via +# `systemctl exit ` so the container exits with the right status. docker run --rm --privileged \ + --cgroupns=host \ + --tmpfs /run \ + --tmpfs /run/lock \ + -v /sys/fs/cgroup:/sys/fs/cgroup:rw \ --network=host \ + -t \ "${BIND_ARGS[@]}" \ -e SOURCE_DATE_EPOCH \ -e SNAPSHOT_TIMESTAMP \ @@ -118,17 +143,20 @@ docker run --rm --privileged \ -e TZ=UTC \ -e REPO_ROOT="${REPO_ROOT}" \ -e BUILD_DIR="${BUILD_DIR}" \ - -w "${REPO_ROOT}" \ "${BUILDER_IMAGE}" \ - bash -euo pipefail -c ' - # derivative-maker refuses to run as root (it uses sudo internally - # for the privileged ops). Hand the workspace ownership to the - # unprivileged builder user (uid 1000, created in the Dockerfile - # with passwordless sudo), then drop privs and let build-inner.sh - # do the actual work. - chown -R builder:builder "${REPO_ROOT}" "${BUILD_DIR}" - runuser -u builder -- "${REPO_ROOT}/linux/build/scripts/build-inner.sh" - ' || { echo "build.sh: derivative-maker failed"; exit 3; } + bash -c ' + # docker-entrypoint.service runs this as root via systemd, with + # the env vars captured by entrypoint.sh into + # /etc/docker-entrypoint-env. We hand workspace ownership to the + # unprivileged user (uid 1000), then sudo into it for the + # derivative-maker invocation. derivative-maker uses sudo + # internally for the bits that need root. + set -e + chown -R 1000:1000 "${REPO_ROOT}" "${BUILD_DIR}" + exec sudo --non-interactive --preserve-env -u user -- \ + "${REPO_ROOT}/linux/build/scripts/build-inner.sh" + ' \ + || { echo "build.sh: derivative-maker failed"; exit 3; } # --- Hash artefacts --------------------------------------------------------- # Run hashing on the host (not in the container) so a busted container image