From 7f96f755c7710979776c213003bf5962de8bc2ef Mon Sep 17 00:00:00 2001 From: SysAdmin Date: Tue, 17 Mar 2026 21:56:23 +0000 Subject: [PATCH] feat: use Playwright for GrabCraft HTML pages and add direct schematic URL support GrabCraft loads voxel data via external JS scripts after page load, so plain fetch() misses the rendered content. Now uses Playwright (via schematics-browser) for HTML pages while keeping plain fetch for .js files. Also adds a 5s wait in fetchPage for async JS content and supports direct .schem/.schematic/.litematic URLs without needing a project page. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/grabcraft.js | 30 ++++++++++++----------- src/schematics-browser.js | 1 + src/schematics.js | 50 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/src/grabcraft.js b/src/grabcraft.js index 183465c..7d08a57 100644 --- a/src/grabcraft.js +++ b/src/grabcraft.js @@ -1,5 +1,6 @@ import { log, logError } from './utils.js'; import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js'; +import { fetchPage as fetchPageBrowser } from './schematics-browser.js'; const TAG = 'GrabCraft'; @@ -462,21 +463,22 @@ function extractMaterials(html) { */ async function fetchPage(url) { const isJs = url.endsWith('.js'); - const response = await fetch(url, { - headers: { - 'User-Agent': USER_AGENT, - 'Accept': isJs - ? 'application/javascript, */*;q=0.8' - : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - }, - }); - - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`); + if (isJs) { + // Plain fetch for JS files (no rendering needed) + const response = await fetch(url, { + headers: { + 'User-Agent': USER_AGENT, + 'Accept': 'application/javascript, */*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + }, + }); + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`); + } + return response.text(); } - - return response.text(); + // Playwright for HTML pages — waits for JS to render + return fetchPageBrowser(url); } /** diff --git a/src/schematics-browser.js b/src/schematics-browser.js index c48380c..be6db10 100644 --- a/src/schematics-browser.js +++ b/src/schematics-browser.js @@ -86,6 +86,7 @@ export async function fetchPage(url, timeoutMs = 30000) { try { await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs }); await waitForCloudflare(page, timeoutMs); + await page.waitForTimeout(5000); // Wait for async JS content (e.g. GrabCraft voxel data) return await page.content(); } finally { await page.close(); diff --git a/src/schematics.js b/src/schematics.js index 24d42f8..e92a17e 100644 --- a/src/schematics.js +++ b/src/schematics.js @@ -1,7 +1,7 @@ import { log, logError } from './utils.js'; import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js'; import * as cache from './schematics-cache.js'; -import { fetchPage, downloadSchematic } from './schematics-browser.js'; +import { fetchPage, downloadSchematic, downloadUrl } from './schematics-browser.js'; import AdmZip from 'adm-zip'; const TAG = 'Schematics'; @@ -98,6 +98,12 @@ export async function searchSchematics(query, page = 1) { * @returns {Promise} Blueprint-compatible object with voxels array */ export async function fetchSchematic(url) { + // Handle direct schematic file URLs (.schem, .schematic, .litematic, .nbt) + const directMatch = url.match(/\.(schem|schematic|litematic|nbt)(\?.*)?$/i); + if (directMatch) { + return fetchDirectSchematic(url); + } + // Extract a stable ID from URL (use slug as ID) const slugMatch = url.match(/\/project\/([^/]+)/); if (!slugMatch) throw new Error(`Invalid project URL: ${url}`); @@ -474,3 +480,45 @@ export function getSchematicCategories() { { name: 'Other', slug: 'other' }, ]; } + +/** + * Fetch a schematic directly from a URL (no project page needed). + * Supports direct links to .schem, .schematic, .litematic, .nbt files. + * @param {string} url - Direct download URL + * @returns {Promise} Blueprint-compatible object + */ +async function fetchDirectSchematic(url) { + // Use filename as ID + const filename = url.split('/').pop().split('?')[0]; + const id = `direct-${cache.cacheKey(filename)}`; + + const parsedData = cache.get('parsed', id); + if (parsedData) { + log(TAG, `Direct schematic cache hit: ${filename}`); + return parsedData; + } + + let rawBuffer = cache.getBuffer('raw', id); + + if (!rawBuffer) { + log(TAG, `Downloading direct schematic: ${url}`); + try { + rawBuffer = await downloadUrl(url); + } catch (err) { + throw new Error(`Failed to download schematic from ${url}: ${err.message}`); + } + + if (!rawBuffer || rawBuffer.length === 0) { + throw new Error('Downloaded schematic file is empty'); + } + + rawBuffer = extractFromZipIfNeeded(rawBuffer); + cache.setBuffer('raw', id, rawBuffer); + log(TAG, `Cached raw direct schematic: ${rawBuffer.length} bytes`); + } + + cache.set('meta', id, { name: filename, url }); + const blueprint = await parseSchematicBuffer(rawBuffer, id, url); + cache.set('parsed', id, blueprint); + return blueprint; +}