From a2bee4b5dc07d04f52d279434908d100ca3ec05a Mon Sep 17 00:00:00 2001 From: SysAdmin Date: Thu, 7 May 2026 20:32:01 +0100 Subject: [PATCH] fix(linux/build): better squashfs extraction + dump TOC sample (M1.1 iter27) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run #4274 made progress: identical ISO sizes, identical TOC, identical first 8 KiB — divergence is fully in file payload bytes. But the diagnostic stalled because extract_squashfs() couldn't find the rootfs: diagnose: could not extract squashfs from A diagnose: could not extract squashfs from B Two reasons to address: 1. The named-path probes only checked /live/filesystem.squashfs, /casper/filesystem.squashfs and /filesystem.squashfs. Some live-build configs use /install/... or no canonical name at all. 2. The fallback that used `xorriso -find / -name '*.squashfs'` then piped to `xorriso -extract` didn't work because xorriso's -find output quotes paths, and -extract chokes on quotes. This iteration: * Adds /install/filesystem.squashfs and /boot/filesystem.squashfs to the named-path probes. * Replaces the -find/-name/tail fallback with a generic "biggest file in the ISO" picker. In a live-build ISO the rootfs payload is reliably the largest file regardless of what it's called. Parses lsdl output (with awk, handling spaces in paths and stripping single-quote framing). * On extraction failure, dumps the top 20 files by size to stderr so the workflow log shows what's actually in the ISO — answers "what should the named-path probe match" for the next iter. * Always echoes the first 30 lines of toc-a.txt (and the line count) so we can sanity-check the ISO layout in every run. Co-Authored-By: Claude Opus 4.7 (1M context) --- linux/build/scripts/diagnose-divergence.sh | 86 ++++++++++++++++++---- 1 file changed, 71 insertions(+), 15 deletions(-) diff --git a/linux/build/scripts/diagnose-divergence.sh b/linux/build/scripts/diagnose-divergence.sh index 3b2fe6b..d98c78b 100755 --- a/linux/build/scripts/diagnose-divergence.sh +++ b/linux/build/scripts/diagnose-divergence.sh @@ -79,32 +79,88 @@ toc_for "${ISO_B}" "${REPORT_DIR}/toc-b.txt" diff -u "${REPORT_DIR}/toc-a.txt" "${REPORT_DIR}/toc-b.txt" \ > "${REPORT_DIR}/toc-diff.txt" 2>/dev/null || true +# Always echo a sample of the TOC so we can see ISO layout in the +# workflow log even when the squashfs extraction works — useful for +# noticing "oh there's a third-party blob in here we don't expect". +echo "diagnose: first 30 lines of TOC (ISO A):" +head -n 30 "${REPORT_DIR}/toc-a.txt" 2>/dev/null || true +echo "diagnose: TOC (ISO A) size=$(wc -l < "${REPORT_DIR}/toc-a.txt" 2>/dev/null) lines" + # --- 3. Extract & compare the squashfs filesystem listings ------------------ -# The outer ISO is mostly a thin wrapper around live/filesystem.squashfs; -# size/content drift almost always lives there. Pull just that file out -# of each ISO and list its contents. +# The outer ISO is mostly a thin wrapper around the rootfs payload (a +# squashfs in live-build's case), so size/content drift almost always +# lives there. Pull just that file out of each ISO and list its contents. +# +# Run #4274 hit the case where the named-path probes (/live/...) all +# missed and the `xorriso -find … | tail -n1` fallback path didn't +# work either (xorriso quotes its -find output, which -extract chokes +# on). This version is more aggressive: lists every file in the ISO +# with its size, picks the genuinely largest, and strips xorriso's +# quoting. +list_iso_files() { + # Output: " " per line, no quoting. + local iso="$1" + xorriso -indev "${iso}" -find / -exec lsdl -- 2>/dev/null \ + | awk ' + # lsdl format: "mode links uid gid size YYYY-MM-DD HH:MM path" + # path can have spaces; reconstruct from $9 onwards. + NF >= 9 && $1 ~ /^-/ { + size=$5 + path=$9 + for (i=10; i<=NF; i++) path=path " " $i + # strip surrounding single quotes if present + gsub(/^'\''|'\''$/, "", path) + print size " " path + }' +} + +biggest_file() { + list_iso_files "$1" \ + | sort -k1,1 -n -r \ + | head -n1 \ + | awk '{ $1=""; sub(/^ /,""); print }' +} + extract_squashfs() { local iso="$1" out="$2" if ! command -v xorriso >/dev/null 2>&1; then return 1; fi - # Try the canonical Debian/Kicksecure layout first. - for path in /live/filesystem.squashfs /casper/filesystem.squashfs /filesystem.squashfs; do - if xorriso -indev "${iso}" -extract "${path}" "${out}" 2>/dev/null; then - [[ -s "${out}" ]] && return 0 + + # Try canonical Debian/Kicksecure layout first. + for path in /live/filesystem.squashfs /casper/filesystem.squashfs \ + /filesystem.squashfs /install/filesystem.squashfs \ + /boot/filesystem.squashfs ; do + if xorriso -indev "${iso}" -extract "${path}" "${out}" 2>/dev/null \ + && [[ -s "${out}" ]]; then + echo "diagnose: extracted ${path} from $(basename "${iso}")" >&2 + return 0 fi done - # Fallback: take the largest .squashfs we can find. + + # Fallback: take the largest file in the ISO, regardless of name. + # In a live-build ISO that's reliably the rootfs payload, even when + # it isn't called *.squashfs. local biggest - biggest=$(xorriso -indev "${iso}" -find / -name '*.squashfs' 2>/dev/null \ - | tail -n1) - [[ -n "${biggest}" ]] || return 1 - xorriso -indev "${iso}" -extract "${biggest}" "${out}" 2>/dev/null || return 1 - [[ -s "${out}" ]] + biggest=$(biggest_file "${iso}") + if [[ -n "${biggest}" ]]; then + echo "diagnose: largest file in $(basename "${iso}") is ${biggest}; extracting" >&2 + if xorriso -indev "${iso}" -extract "${biggest}" "${out}" 2>/dev/null \ + && [[ -s "${out}" ]]; then + return 0 + fi + fi + return 1 } SQFS_A="${WORK_DIR}/a.squashfs" SQFS_B="${WORK_DIR}/b.squashfs" -extract_squashfs "${ISO_A}" "${SQFS_A}" || echo "diagnose: could not extract squashfs from A" >&2 -extract_squashfs "${ISO_B}" "${SQFS_B}" || echo "diagnose: could not extract squashfs from B" >&2 + +if ! extract_squashfs "${ISO_A}" "${SQFS_A}"; then + echo "diagnose: could not extract rootfs from A — top 20 ISO files by size:" >&2 + list_iso_files "${ISO_A}" | sort -k1,1 -n -r | head -n20 >&2 || true +fi +if ! extract_squashfs "${ISO_B}" "${SQFS_B}"; then + echo "diagnose: could not extract rootfs from B" >&2 +fi if [[ -s "${SQFS_A}" && -s "${SQFS_B}" ]]; then SQFS_SIZE_A=$(stat -c%s "${SQFS_A}")