feat: use Playwright for GrabCraft HTML pages and add direct schematic URL support
All checks were successful
Deploy to Docker / deploy (push) Successful in 1m26s

GrabCraft loads voxel data via external JS scripts after page load, so plain
fetch() misses the rendered content. Now uses Playwright (via schematics-browser)
for HTML pages while keeping plain fetch for .js files. Also adds a 5s wait in
fetchPage for async JS content and supports direct .schem/.schematic/.litematic
URLs without needing a project page.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-17 21:56:23 +00:00
parent df8b367e8c
commit 7f96f755c7
3 changed files with 66 additions and 15 deletions

View File

@@ -1,5 +1,6 @@
import { log, logError } from './utils.js'; import { log, logError } from './utils.js';
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js'; import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
import { fetchPage as fetchPageBrowser } from './schematics-browser.js';
const TAG = 'GrabCraft'; const TAG = 'GrabCraft';
@@ -462,21 +463,22 @@ function extractMaterials(html) {
*/ */
async function fetchPage(url) { async function fetchPage(url) {
const isJs = url.endsWith('.js'); const isJs = url.endsWith('.js');
const response = await fetch(url, { if (isJs) {
headers: { // Plain fetch for JS files (no rendering needed)
'User-Agent': USER_AGENT, const response = await fetch(url, {
'Accept': isJs headers: {
? 'application/javascript, */*;q=0.8' 'User-Agent': USER_AGENT,
: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'application/javascript, */*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5', 'Accept-Language': 'en-US,en;q=0.5',
}, },
}); });
if (!response.ok) {
if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`); }
return response.text();
} }
// Playwright for HTML pages — waits for JS to render
return response.text(); return fetchPageBrowser(url);
} }
/** /**

View File

@@ -86,6 +86,7 @@ export async function fetchPage(url, timeoutMs = 30000) {
try { try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs }); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
await waitForCloudflare(page, timeoutMs); await waitForCloudflare(page, timeoutMs);
await page.waitForTimeout(5000); // Wait for async JS content (e.g. GrabCraft voxel data)
return await page.content(); return await page.content();
} finally { } finally {
await page.close(); await page.close();

View File

@@ -1,7 +1,7 @@
import { log, logError } from './utils.js'; import { log, logError } from './utils.js';
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js'; import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
import * as cache from './schematics-cache.js'; import * as cache from './schematics-cache.js';
import { fetchPage, downloadSchematic } from './schematics-browser.js'; import { fetchPage, downloadSchematic, downloadUrl } from './schematics-browser.js';
import AdmZip from 'adm-zip'; import AdmZip from 'adm-zip';
const TAG = 'Schematics'; const TAG = 'Schematics';
@@ -98,6 +98,12 @@ export async function searchSchematics(query, page = 1) {
* @returns {Promise<object>} Blueprint-compatible object with voxels array * @returns {Promise<object>} Blueprint-compatible object with voxels array
*/ */
export async function fetchSchematic(url) { export async function fetchSchematic(url) {
// Handle direct schematic file URLs (.schem, .schematic, .litematic, .nbt)
const directMatch = url.match(/\.(schem|schematic|litematic|nbt)(\?.*)?$/i);
if (directMatch) {
return fetchDirectSchematic(url);
}
// Extract a stable ID from URL (use slug as ID) // Extract a stable ID from URL (use slug as ID)
const slugMatch = url.match(/\/project\/([^/]+)/); const slugMatch = url.match(/\/project\/([^/]+)/);
if (!slugMatch) throw new Error(`Invalid project URL: ${url}`); if (!slugMatch) throw new Error(`Invalid project URL: ${url}`);
@@ -474,3 +480,45 @@ export function getSchematicCategories() {
{ name: 'Other', slug: 'other' }, { name: 'Other', slug: 'other' },
]; ];
} }
/**
* Fetch a schematic directly from a URL (no project page needed).
* Supports direct links to .schem, .schematic, .litematic, .nbt files.
* @param {string} url - Direct download URL
* @returns {Promise<object>} Blueprint-compatible object
*/
async function fetchDirectSchematic(url) {
// Use filename as ID
const filename = url.split('/').pop().split('?')[0];
const id = `direct-${cache.cacheKey(filename)}`;
const parsedData = cache.get('parsed', id);
if (parsedData) {
log(TAG, `Direct schematic cache hit: ${filename}`);
return parsedData;
}
let rawBuffer = cache.getBuffer('raw', id);
if (!rawBuffer) {
log(TAG, `Downloading direct schematic: ${url}`);
try {
rawBuffer = await downloadUrl(url);
} catch (err) {
throw new Error(`Failed to download schematic from ${url}: ${err.message}`);
}
if (!rawBuffer || rawBuffer.length === 0) {
throw new Error('Downloaded schematic file is empty');
}
rawBuffer = extractFromZipIfNeeded(rawBuffer);
cache.setBuffer('raw', id, rawBuffer);
log(TAG, `Cached raw direct schematic: ${rawBuffer.length} bytes`);
}
cache.set('meta', id, { name: filename, url });
const blueprint = await parseSchematicBuffer(rawBuffer, id, url);
cache.set('parsed', id, blueprint);
return blueprint;
}