From a30671c44cb26c7d5dddb58e6dbe7a4b26f82b40 Mon Sep 17 00:00:00 2001 From: SysAdmin Date: Tue, 17 Mar 2026 01:33:56 +0000 Subject: [PATCH] fix(schematics): handle PMC download countdown page and ZIP-wrapped schematics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The download pipeline was getting HTML instead of the binary file because PMC's /download/schematic/ returns a countdown confirmation page. Added downloadSchematic() that uses a full browser flow (project visit → download page → extract static URL → capture file). Also added ZIP extraction via adm-zip for schematics wrapped in ZIP archives. Co-Authored-By: Claude Opus 4.6 (1M context) --- package-lock.json | 10 ++++ package.json | 1 + src/schematics-browser.js | 103 ++++++++++++++++++++++++++++++++++++++ src/schematics.js | 73 +++++++++++++++++++++------ 4 files changed, 171 insertions(+), 16 deletions(-) diff --git a/package-lock.json b/package-lock.json index e5da633..d7878c0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.0.0", "dependencies": { "@modelcontextprotocol/sdk": "^1.27.1", + "adm-zip": "^0.5.16", "express": "^4.21.2", "minecraft-data": "^3.105.0", "playwright": "^1.58.2", @@ -382,6 +383,15 @@ "node": ">= 0.6" } }, + "node_modules/adm-zip": { + "version": "0.5.16", + "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.16.tgz", + "integrity": "sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ==", + "license": "MIT", + "engines": { + "node": ">=12.0" + } + }, "node_modules/ajv": { "version": "8.18.0", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", diff --git a/package.json b/package.json index affd998..4a4d4cf 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ }, "dependencies": { "@modelcontextprotocol/sdk": "^1.27.1", + "adm-zip": "^0.5.16", "express": "^4.21.2", "minecraft-data": "^3.105.0", "playwright": "^1.58.2", diff --git a/src/schematics-browser.js b/src/schematics-browser.js index 174836a..e30d365 100644 --- a/src/schematics-browser.js +++ b/src/schematics-browser.js @@ -160,6 +160,109 @@ export async function downloadUrl(url, timeoutMs = 60000) { } } +/** + * Download a schematic from Planet Minecraft using the full browser flow. + * PMC's /download/schematic/ page shows a countdown confirmation, not the actual file. + * This function: + * 1. Visits the project page (sets cookies/session) + * 2. Navigates to /download/schematic/ in the same context + * 3. Extracts the static file URL from the element + * 4. Triggers the download via navigation and captures the file + * @param {string} projectUrl - Planet Minecraft project URL + * @param {number} [timeoutMs=60000] + * @returns {Promise} Downloaded schematic file contents + */ +export async function downloadSchematic(projectUrl, timeoutMs = 60000) { + const browser = await getBrowser(); + const context = await browser.newContext({ + userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + viewport: { width: 1920, height: 1080 }, + locale: 'en-US', + acceptDownloads: true, + }); + const page = await context.newPage(); + await page.addInitScript(() => { + Object.defineProperty(navigator, 'webdriver', { get: () => false }); + }); + try { + // Step 1: Visit project page to establish session/cookies + log(TAG, `Visiting project page: ${projectUrl}`); + await page.goto(projectUrl, { waitUntil: 'domcontentloaded', timeout: timeoutMs }); + await waitForCloudflare(page, timeoutMs); + + // Step 2: Navigate to download confirmation page + const downloadPageUrl = projectUrl.replace(/\/?$/, '/download/schematic/'); + log(TAG, `Navigating to download page: ${downloadPageUrl}`); + await page.goto(downloadPageUrl, { waitUntil: 'domcontentloaded', timeout: timeoutMs }); + await waitForCloudflare(page, timeoutMs); + + // Step 3: Try to extract the static file URL from element + let staticUrl = null; + try { + staticUrl = await page.getAttribute('a[download]', 'href', { timeout: 5000 }); + if (staticUrl) { + log(TAG, `Found static download URL: ${staticUrl}`); + } + } catch { + log(TAG, 'No element found, will try countdown fallback'); + } + + // Also try broader selectors if the first didn't work + if (!staticUrl) { + try { + staticUrl = await page.getAttribute('a[href*="static.planetminecraft.com"]', 'href', { timeout: 3000 }); + if (staticUrl) { + log(TAG, `Found static PMC URL: ${staticUrl}`); + } + } catch { + // Will use fallback + } + } + + let downloadBuffer; + + if (staticUrl) { + // Step 4a: Download via navigating to the static URL + log(TAG, 'Downloading via static URL...'); + const [download] = await Promise.all([ + page.waitForEvent('download', { timeout: timeoutMs }), + page.evaluate((url) => { window.location.href = url; }, staticUrl), + ]); + const path = await download.path(); + if (!path) throw new Error('Download failed — no file path returned'); + const { readFileSync } = await import('node:fs'); + downloadBuffer = readFileSync(path); + } else { + // Step 4b: Fallback — wait for countdown and click download button + log(TAG, 'Waiting for countdown timer to complete...'); + // Wait for the download button/link to become active (countdown is typically 5s) + try { + await page.waitForSelector('a.download-action:not([disabled]), a[href*="static.planetminecraft.com"], .confirm-download a', { + timeout: 15000, + state: 'visible', + }); + } catch { + throw new Error('No schematic download available for this project. The project may not include a downloadable schematic file.'); + } + + const [download] = await Promise.all([ + page.waitForEvent('download', { timeout: timeoutMs }), + page.click('a.download-action, a[href*="static.planetminecraft.com"], .confirm-download a'), + ]); + const path = await download.path(); + if (!path) throw new Error('Download failed — no file path returned'); + const { readFileSync } = await import('node:fs'); + downloadBuffer = readFileSync(path); + } + + log(TAG, `Downloaded ${downloadBuffer.length} bytes`); + return downloadBuffer; + } finally { + await page.close(); + await context.close(); + } +} + /** * Close the browser instance. Call during shutdown. */ diff --git a/src/schematics.js b/src/schematics.js index 0aedacd..b871569 100644 --- a/src/schematics.js +++ b/src/schematics.js @@ -1,7 +1,8 @@ import { log, logError } from './utils.js'; import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js'; import * as cache from './schematics-cache.js'; -import { fetchPage, downloadUrl } from './schematics-browser.js'; +import { fetchPage, downloadSchematic } from './schematics-browser.js'; +import AdmZip from 'adm-zip'; const TAG = 'Schematics'; const BASE_URL = 'https://www.planetminecraft.com'; @@ -131,29 +132,21 @@ export async function fetchSchematic(url) { // Cache metadata cache.set('meta', id, { name, url }); - // Find schematic download URL - // Planet Minecraft uses: /project/slug/download/schematic/ - const downloadPath = url.replace(/\/?$/, '/download/schematic/'); - - log(TAG, `Downloading schematic file: ${downloadPath}`); + // Download schematic via Playwright browser flow (handles PMC countdown page) + log(TAG, `Downloading schematic for: ${url}`); try { - rawBuffer = await downloadUrl(downloadPath); + rawBuffer = await downloadSchematic(url); } catch (err) { - // Try alternate — look for any download link with "schematic" in it - const dlMatch = html.match(/href="([^"]*download[^"]*schematic[^"]*)"/i); - if (dlMatch) { - const altUrl = dlMatch[1].startsWith('http') ? dlMatch[1] : BASE_URL + dlMatch[1]; - log(TAG, `Trying alternate download URL: ${altUrl}`); - rawBuffer = await downloadUrl(altUrl); - } else { - throw new Error(`Failed to download schematic: ${err.message}`); - } + throw new Error(`Failed to download schematic: ${err.message}`); } if (!rawBuffer || rawBuffer.length === 0) { throw new Error('Downloaded schematic file is empty'); } + // Handle ZIP-wrapped schematics + rawBuffer = extractFromZipIfNeeded(rawBuffer); + cache.setBuffer('raw', id, rawBuffer); log(TAG, `Cached raw schematic: ${rawBuffer.length} bytes`); } @@ -165,6 +158,54 @@ export async function fetchSchematic(url) { return blueprint; } +/** + * If the buffer is a ZIP archive, extract the first schematic file from it. + * Passes through GZIP and raw schematic buffers unchanged. + * @param {Buffer} buffer + * @returns {Buffer} + */ +function extractFromZipIfNeeded(buffer) { + // Check magic bytes + const magic = buffer.slice(0, 4).toString('hex'); + + // GZIP (1f8b) — prismarine-schematic handles these natively + if (magic.startsWith('1f8b')) { + log(TAG, 'Detected GZIP format, passing through'); + return buffer; + } + + // ZIP (504b0304) + if (magic === '504b0304') { + log(TAG, 'Detected ZIP archive, extracting schematic...'); + const zip = new AdmZip(buffer); + const entries = zip.getEntries(); + const schematicExtensions = ['.schematic', '.schem', '.nbt', '.litematic']; + + // Find first matching schematic file + const entry = entries.find(e => + schematicExtensions.some(ext => e.entryName.toLowerCase().endsWith(ext)) + ); + + if (entry) { + log(TAG, `Extracted "${entry.entryName}" from ZIP (${entry.header.size} bytes)`); + return entry.getData(); + } + + // If no recognized extension, try the first non-directory entry + const firstFile = entries.find(e => !e.isDirectory); + if (firstFile) { + log(TAG, `No schematic extension found, using first file: "${firstFile.entryName}"`); + return firstFile.getData(); + } + + throw new Error('ZIP archive contains no extractable files'); + } + + // Not ZIP or GZIP — assume raw NBT schematic + log(TAG, `Non-ZIP/GZIP format (magic: ${magic}), passing through`); + return buffer; +} + /** * Parse a raw .schematic/.schem buffer into our blueprint format. * @param {Buffer} buffer