#!/usr/bin/env bash # SilverMetal Linux — reproducibility-failure diagnostic. # # Invoked by build-iso-linux.yaml's Compare SHA256 step when two builds # disagree, but also safe to run by hand against any two ISOs: # # ISO_A=/path/a.iso ISO_B=/path/b.iso linux/build/scripts/diagnose-divergence.sh # # Designed to run inside silvermetal-builder where xorriso, squashfs-tools, # and diffoscope-minimal are present. # # Strategy: staged analysis, cheap-to-expensive. # 1. sha256 + sizes (always) # 2. ISO TOC diff (xorriso): tells us which top-level files differ. # Cheap: lists files + sizes, no payload extraction. # 3. squashfs file listing diff (unsquashfs -ll): tells us which # *inner* files differ. The outer ISO is mostly squashfs payload, # so this is usually the layer with all the signal. # 4. Targeted diffoscope: only on inner files that actually differ # between A and B (and only on ones small enough to be worth # inspecting). Avoids the OOM that's predictable when diffoscope # recurses into the whole 1 GB ISO at once (run #4273 hit this). # # Output goes to REPORT_DIR; build-iso-linux.yaml tails the salient # bits into the workflow log directly because Gitea 1.25.2 doesn't # expose upload-artifact@v3 payloads via its API. set -uo pipefail # NOT set -e — we want every diagnostic to attempt, even if earlier # ones fail. Each step `|| true`s itself. SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." && pwd)" : "${ISO_A:?ISO_A must point to the first ISO}" : "${ISO_B:?ISO_B must point to the second ISO}" if [[ ! -f "${ISO_A}" || ! -f "${ISO_B}" ]]; then echo "diagnose: one of the ISOs is missing (A=${ISO_A} B=${ISO_B})" >&2 exit 1 fi REPORT_DIR="${REPORT_DIR:-${REPO_ROOT}/linux/build/output/_divergence-$(date -u +%Y%m%dT%H%M%SZ)}" mkdir -p "${REPORT_DIR}" WORK_DIR="$(mktemp -d -t silvermetal-divergence.XXXXXX)" trap 'rm -rf "${WORK_DIR}"' EXIT echo "diagnose: writing report to ${REPORT_DIR}" echo "diagnose: scratch dir ${WORK_DIR}" # --- 1. sha256 + sizes ------------------------------------------------------ sha256sum "${ISO_A}" "${ISO_B}" > "${REPORT_DIR}/sha256.txt" 2>&1 || true ls -la "${ISO_A}" "${ISO_B}" > "${REPORT_DIR}/sizes.txt" 2>&1 || true SIZE_A=$(stat -c%s "${ISO_A}" 2>/dev/null || echo 0) SIZE_B=$(stat -c%s "${ISO_B}" 2>/dev/null || echo 0) SIZE_DELTA=$(( SIZE_B - SIZE_A )) echo "diagnose: sizes A=${SIZE_A} B=${SIZE_B} delta=${SIZE_DELTA}" # --- 2. ISO TOC diff -------------------------------------------------------- toc_for() { local iso="$1" out="$2" if command -v xorriso >/dev/null 2>&1; then # `-find / -exec lsdl --` gives a long listing of every node: # mode | links | uid/gid | size | mtime | path # That covers timestamp, ownership and size diffs in one pass. xorriso -indev "${iso}" -find / -exec lsdl -- 2>/dev/null > "${out}" || true elif command -v isoinfo >/dev/null 2>&1; then isoinfo -R -l -i "${iso}" > "${out}" 2>/dev/null || true else echo "diagnose: no xorriso/isoinfo available, skipping TOC" >&2 return 1 fi } toc_for "${ISO_A}" "${REPORT_DIR}/toc-a.txt" toc_for "${ISO_B}" "${REPORT_DIR}/toc-b.txt" diff -u "${REPORT_DIR}/toc-a.txt" "${REPORT_DIR}/toc-b.txt" \ > "${REPORT_DIR}/toc-diff.txt" 2>/dev/null || true # Always echo a sample of the TOC so we can see ISO layout in the # workflow log even when the squashfs extraction works — useful for # noticing "oh there's a third-party blob in here we don't expect". echo "diagnose: first 30 lines of TOC (ISO A):" head -n 30 "${REPORT_DIR}/toc-a.txt" 2>/dev/null || true echo "diagnose: TOC (ISO A) size=$(wc -l < "${REPORT_DIR}/toc-a.txt" 2>/dev/null) lines" # --- 3. Extract & compare the squashfs filesystem listings ------------------ # The outer ISO is mostly a thin wrapper around the rootfs payload (a # squashfs in live-build's case), so size/content drift almost always # lives there. Pull just that file out of each ISO and list its contents. # # Run #4274 hit the case where the named-path probes (/live/...) all # missed and the `xorriso -find … | tail -n1` fallback path didn't # work either (xorriso quotes its -find output, which -extract chokes # on). This version is more aggressive: lists every file in the ISO # with its size, picks the genuinely largest, and strips xorriso's # quoting. list_iso_files() { # Output: " " per line, no quoting. local iso="$1" xorriso -indev "${iso}" -find / -exec lsdl -- 2>/dev/null \ | awk ' # lsdl format: "mode links uid gid size YYYY-MM-DD HH:MM path" # path can have spaces; reconstruct from $9 onwards. NF >= 9 && $1 ~ /^-/ { size=$5 path=$9 for (i=10; i<=NF; i++) path=path " " $i # strip surrounding single quotes if present gsub(/^'\''|'\''$/, "", path) print size " " path }' } biggest_file() { list_iso_files "$1" \ | sort -k1,1 -n -r \ | head -n1 \ | awk '{ $1=""; sub(/^ /,""); print }' } extract_squashfs() { local iso="$1" out="$2" if ! command -v xorriso >/dev/null 2>&1; then return 1; fi # Try canonical Debian/Kicksecure layout first. local err_log; err_log=$(mktemp) for path in /live/filesystem.squashfs /casper/filesystem.squashfs \ /filesystem.squashfs /install/filesystem.squashfs \ /boot/filesystem.squashfs ; do if xorriso -osirrox on -indev "${iso}" -extract "${path}" "${out}" \ 2>"${err_log}" \ && [[ -s "${out}" ]]; then echo "diagnose: extracted ${path} from $(basename "${iso}")" >&2 rm -f "${err_log}" return 0 fi done # Fallback: take the largest file in the ISO, regardless of name. # In a live-build ISO that's reliably the rootfs payload, even when # it isn't called *.squashfs. local biggest biggest=$(biggest_file "${iso}") if [[ -n "${biggest}" ]]; then echo "diagnose: largest file in $(basename "${iso}") is ${biggest}; extracting" >&2 if xorriso -osirrox on -indev "${iso}" -extract "${biggest}" "${out}" \ 2>"${err_log}" \ && [[ -s "${out}" ]]; then rm -f "${err_log}" return 0 fi fi # If we got here, extraction failed; surface the error. echo "diagnose: xorriso -extract stderr (last 30 lines):" >&2 tail -n 30 "${err_log}" >&2 || true rm -f "${err_log}" return 1 } SQFS_A="${WORK_DIR}/a.squashfs" SQFS_B="${WORK_DIR}/b.squashfs" if ! extract_squashfs "${ISO_A}" "${SQFS_A}"; then echo "diagnose: could not extract rootfs from A — top 20 ISO files by size:" >&2 list_iso_files "${ISO_A}" | sort -k1,1 -n -r | head -n20 >&2 || true fi if ! extract_squashfs "${ISO_B}" "${SQFS_B}"; then echo "diagnose: could not extract rootfs from B" >&2 fi if [[ -s "${SQFS_A}" && -s "${SQFS_B}" ]]; then SQFS_SIZE_A=$(stat -c%s "${SQFS_A}") SQFS_SIZE_B=$(stat -c%s "${SQFS_B}") echo "diagnose: squashfs sizes A=${SQFS_SIZE_A} B=${SQFS_SIZE_B} delta=$(( SQFS_SIZE_B - SQFS_SIZE_A ))" sha256sum "${SQFS_A}" "${SQFS_B}" > "${REPORT_DIR}/squashfs-sha256.txt" if command -v unsquashfs >/dev/null 2>&1; then # -ll = long listing with permissions, owner, size, date, target. # Easiest format to diff for "which files have different sizes". unsquashfs -ll "${SQFS_A}" 2>/dev/null > "${REPORT_DIR}/sqfs-ls-a.txt" || true unsquashfs -ll "${SQFS_B}" 2>/dev/null > "${REPORT_DIR}/sqfs-ls-b.txt" || true diff -u "${REPORT_DIR}/sqfs-ls-a.txt" "${REPORT_DIR}/sqfs-ls-b.txt" \ > "${REPORT_DIR}/sqfs-ls-diff.txt" 2>/dev/null || true fi # --- 4. Targeted diffoscope on the squashfs only -------------------- # Comparing two ~1 GB squashfs files directly is still big, but it's # bounded — diffoscope won't recurse out into the boot sectors, # initrd, kernel, etc. Cap the report size aggressively, no html # (memory hog), and forbid recursion past one container layer. if command -v diffoscope >/dev/null 2>&1; then echo "diagnose: running diffoscope on squashfs payload" timeout 600 diffoscope \ --no-default-limits \ --max-page-size 50000000 \ --max-text-report-size 5000000 \ --max-container-depth 2 \ --text "${REPORT_DIR}/sqfs-diff.txt" \ "${SQFS_A}" "${SQFS_B}" \ >/dev/null 2>&1 || true fi fi # --- Fallback: cmp -l on first KB of the ISOs (catches header-level drift) -- if command -v cmp >/dev/null 2>&1; then cmp -l -n 8192 "${ISO_A}" "${ISO_B}" > "${REPORT_DIR}/iso-header-cmp.txt" 2>&1 || true fi # --- Checklist -------------------------------------------------------------- { echo "## Likely-culprit checklist" echo "" echo "ISO size delta: ${SIZE_DELTA} bytes" if [[ -s "${SQFS_A}" && -s "${SQFS_B}" ]]; then echo "squashfs size delta: $(( SQFS_SIZE_B - SQFS_SIZE_A )) bytes" fi echo "" echo "Walk these in order — most failures fall into the first two." echo "" echo " [ ] SOURCE_DATE_EPOCH was identical in both builds (compare BUILD_INFO files)" echo " [ ] snapshot.debian.org timestamp matched (compare snapshot-pin.env files)" echo " [ ] Same builder image digest (compare BUILD_INFO files)" echo " [ ] mksquashfs reproducibility flags survived (-no-exports -no-xattrs -reproducible)" echo " [ ] No build-id randomisation in kernel/initrd (look for differing .note.gnu.build-id)" echo " [ ] No host hostname/username leakage (grep for the runner host name)" echo " [ ] No locale drift (LC_ALL=C.UTF-8 enforced in container)" echo " [ ] dpkg trigger/postinst ordering (look at INFO: triggered ... in build log)" } > "${REPORT_DIR}/checklist.md" echo "diagnose: done. Files in ${REPORT_DIR}:" ls -la "${REPORT_DIR}" 2>/dev/null || true