feat: use Playwright for GrabCraft HTML pages and add direct schematic URL support
All checks were successful
Deploy to Docker / deploy (push) Successful in 1m26s
All checks were successful
Deploy to Docker / deploy (push) Successful in 1m26s
GrabCraft loads voxel data via external JS scripts after page load, so plain fetch() misses the rendered content. Now uses Playwright (via schematics-browser) for HTML pages while keeping plain fetch for .js files. Also adds a 5s wait in fetchPage for async JS content and supports direct .schem/.schematic/.litematic URLs without needing a project page. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import { log, logError } from './utils.js';
|
||||
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
|
||||
import { fetchPage as fetchPageBrowser } from './schematics-browser.js';
|
||||
|
||||
const TAG = 'GrabCraft';
|
||||
|
||||
@@ -462,21 +463,22 @@ function extractMaterials(html) {
|
||||
*/
|
||||
async function fetchPage(url) {
|
||||
const isJs = url.endsWith('.js');
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': isJs
|
||||
? 'application/javascript, */*;q=0.8'
|
||||
: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
|
||||
if (isJs) {
|
||||
// Plain fetch for JS files (no rendering needed)
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': 'application/javascript, */*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
|
||||
}
|
||||
return response.text();
|
||||
}
|
||||
|
||||
return response.text();
|
||||
// Playwright for HTML pages — waits for JS to render
|
||||
return fetchPageBrowser(url);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -86,6 +86,7 @@ export async function fetchPage(url, timeoutMs = 30000) {
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
|
||||
await waitForCloudflare(page, timeoutMs);
|
||||
await page.waitForTimeout(5000); // Wait for async JS content (e.g. GrabCraft voxel data)
|
||||
return await page.content();
|
||||
} finally {
|
||||
await page.close();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { log, logError } from './utils.js';
|
||||
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
|
||||
import * as cache from './schematics-cache.js';
|
||||
import { fetchPage, downloadSchematic } from './schematics-browser.js';
|
||||
import { fetchPage, downloadSchematic, downloadUrl } from './schematics-browser.js';
|
||||
import AdmZip from 'adm-zip';
|
||||
|
||||
const TAG = 'Schematics';
|
||||
@@ -98,6 +98,12 @@ export async function searchSchematics(query, page = 1) {
|
||||
* @returns {Promise<object>} Blueprint-compatible object with voxels array
|
||||
*/
|
||||
export async function fetchSchematic(url) {
|
||||
// Handle direct schematic file URLs (.schem, .schematic, .litematic, .nbt)
|
||||
const directMatch = url.match(/\.(schem|schematic|litematic|nbt)(\?.*)?$/i);
|
||||
if (directMatch) {
|
||||
return fetchDirectSchematic(url);
|
||||
}
|
||||
|
||||
// Extract a stable ID from URL (use slug as ID)
|
||||
const slugMatch = url.match(/\/project\/([^/]+)/);
|
||||
if (!slugMatch) throw new Error(`Invalid project URL: ${url}`);
|
||||
@@ -474,3 +480,45 @@ export function getSchematicCategories() {
|
||||
{ name: 'Other', slug: 'other' },
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a schematic directly from a URL (no project page needed).
|
||||
* Supports direct links to .schem, .schematic, .litematic, .nbt files.
|
||||
* @param {string} url - Direct download URL
|
||||
* @returns {Promise<object>} Blueprint-compatible object
|
||||
*/
|
||||
async function fetchDirectSchematic(url) {
|
||||
// Use filename as ID
|
||||
const filename = url.split('/').pop().split('?')[0];
|
||||
const id = `direct-${cache.cacheKey(filename)}`;
|
||||
|
||||
const parsedData = cache.get('parsed', id);
|
||||
if (parsedData) {
|
||||
log(TAG, `Direct schematic cache hit: ${filename}`);
|
||||
return parsedData;
|
||||
}
|
||||
|
||||
let rawBuffer = cache.getBuffer('raw', id);
|
||||
|
||||
if (!rawBuffer) {
|
||||
log(TAG, `Downloading direct schematic: ${url}`);
|
||||
try {
|
||||
rawBuffer = await downloadUrl(url);
|
||||
} catch (err) {
|
||||
throw new Error(`Failed to download schematic from ${url}: ${err.message}`);
|
||||
}
|
||||
|
||||
if (!rawBuffer || rawBuffer.length === 0) {
|
||||
throw new Error('Downloaded schematic file is empty');
|
||||
}
|
||||
|
||||
rawBuffer = extractFromZipIfNeeded(rawBuffer);
|
||||
cache.setBuffer('raw', id, rawBuffer);
|
||||
log(TAG, `Cached raw direct schematic: ${rawBuffer.length} bytes`);
|
||||
}
|
||||
|
||||
cache.set('meta', id, { name: filename, url });
|
||||
const blueprint = await parseSchematicBuffer(rawBuffer, id, url);
|
||||
cache.set('parsed', id, blueprint);
|
||||
return blueprint;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user