feat: use Playwright for GrabCraft HTML pages and add direct schematic URL support
All checks were successful
Deploy to Docker / deploy (push) Successful in 1m26s
All checks were successful
Deploy to Docker / deploy (push) Successful in 1m26s
GrabCraft loads voxel data via external JS scripts after page load, so plain fetch() misses the rendered content. Now uses Playwright (via schematics-browser) for HTML pages while keeping plain fetch for .js files. Also adds a 5s wait in fetchPage for async JS content and supports direct .schem/.schematic/.litematic URLs without needing a project page. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import { log, logError } from './utils.js';
|
import { log, logError } from './utils.js';
|
||||||
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
|
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
|
||||||
|
import { fetchPage as fetchPageBrowser } from './schematics-browser.js';
|
||||||
|
|
||||||
const TAG = 'GrabCraft';
|
const TAG = 'GrabCraft';
|
||||||
|
|
||||||
@@ -462,21 +463,22 @@ function extractMaterials(html) {
|
|||||||
*/
|
*/
|
||||||
async function fetchPage(url) {
|
async function fetchPage(url) {
|
||||||
const isJs = url.endsWith('.js');
|
const isJs = url.endsWith('.js');
|
||||||
const response = await fetch(url, {
|
if (isJs) {
|
||||||
headers: {
|
// Plain fetch for JS files (no rendering needed)
|
||||||
'User-Agent': USER_AGENT,
|
const response = await fetch(url, {
|
||||||
'Accept': isJs
|
headers: {
|
||||||
? 'application/javascript, */*;q=0.8'
|
'User-Agent': USER_AGENT,
|
||||||
: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
'Accept': 'application/javascript, */*;q=0.8',
|
||||||
'Accept-Language': 'en-US,en;q=0.5',
|
'Accept-Language': 'en-US,en;q=0.5',
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
if (!response.ok) {
|
||||||
if (!response.ok) {
|
throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
|
||||||
throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
|
}
|
||||||
|
return response.text();
|
||||||
}
|
}
|
||||||
|
// Playwright for HTML pages — waits for JS to render
|
||||||
return response.text();
|
return fetchPageBrowser(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -86,6 +86,7 @@ export async function fetchPage(url, timeoutMs = 30000) {
|
|||||||
try {
|
try {
|
||||||
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
|
||||||
await waitForCloudflare(page, timeoutMs);
|
await waitForCloudflare(page, timeoutMs);
|
||||||
|
await page.waitForTimeout(5000); // Wait for async JS content (e.g. GrabCraft voxel data)
|
||||||
return await page.content();
|
return await page.content();
|
||||||
} finally {
|
} finally {
|
||||||
await page.close();
|
await page.close();
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { log, logError } from './utils.js';
|
import { log, logError } from './utils.js';
|
||||||
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
|
import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
|
||||||
import * as cache from './schematics-cache.js';
|
import * as cache from './schematics-cache.js';
|
||||||
import { fetchPage, downloadSchematic } from './schematics-browser.js';
|
import { fetchPage, downloadSchematic, downloadUrl } from './schematics-browser.js';
|
||||||
import AdmZip from 'adm-zip';
|
import AdmZip from 'adm-zip';
|
||||||
|
|
||||||
const TAG = 'Schematics';
|
const TAG = 'Schematics';
|
||||||
@@ -98,6 +98,12 @@ export async function searchSchematics(query, page = 1) {
|
|||||||
* @returns {Promise<object>} Blueprint-compatible object with voxels array
|
* @returns {Promise<object>} Blueprint-compatible object with voxels array
|
||||||
*/
|
*/
|
||||||
export async function fetchSchematic(url) {
|
export async function fetchSchematic(url) {
|
||||||
|
// Handle direct schematic file URLs (.schem, .schematic, .litematic, .nbt)
|
||||||
|
const directMatch = url.match(/\.(schem|schematic|litematic|nbt)(\?.*)?$/i);
|
||||||
|
if (directMatch) {
|
||||||
|
return fetchDirectSchematic(url);
|
||||||
|
}
|
||||||
|
|
||||||
// Extract a stable ID from URL (use slug as ID)
|
// Extract a stable ID from URL (use slug as ID)
|
||||||
const slugMatch = url.match(/\/project\/([^/]+)/);
|
const slugMatch = url.match(/\/project\/([^/]+)/);
|
||||||
if (!slugMatch) throw new Error(`Invalid project URL: ${url}`);
|
if (!slugMatch) throw new Error(`Invalid project URL: ${url}`);
|
||||||
@@ -474,3 +480,45 @@ export function getSchematicCategories() {
|
|||||||
{ name: 'Other', slug: 'other' },
|
{ name: 'Other', slug: 'other' },
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch a schematic directly from a URL (no project page needed).
|
||||||
|
* Supports direct links to .schem, .schematic, .litematic, .nbt files.
|
||||||
|
* @param {string} url - Direct download URL
|
||||||
|
* @returns {Promise<object>} Blueprint-compatible object
|
||||||
|
*/
|
||||||
|
async function fetchDirectSchematic(url) {
|
||||||
|
// Use filename as ID
|
||||||
|
const filename = url.split('/').pop().split('?')[0];
|
||||||
|
const id = `direct-${cache.cacheKey(filename)}`;
|
||||||
|
|
||||||
|
const parsedData = cache.get('parsed', id);
|
||||||
|
if (parsedData) {
|
||||||
|
log(TAG, `Direct schematic cache hit: ${filename}`);
|
||||||
|
return parsedData;
|
||||||
|
}
|
||||||
|
|
||||||
|
let rawBuffer = cache.getBuffer('raw', id);
|
||||||
|
|
||||||
|
if (!rawBuffer) {
|
||||||
|
log(TAG, `Downloading direct schematic: ${url}`);
|
||||||
|
try {
|
||||||
|
rawBuffer = await downloadUrl(url);
|
||||||
|
} catch (err) {
|
||||||
|
throw new Error(`Failed to download schematic from ${url}: ${err.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!rawBuffer || rawBuffer.length === 0) {
|
||||||
|
throw new Error('Downloaded schematic file is empty');
|
||||||
|
}
|
||||||
|
|
||||||
|
rawBuffer = extractFromZipIfNeeded(rawBuffer);
|
||||||
|
cache.setBuffer('raw', id, rawBuffer);
|
||||||
|
log(TAG, `Cached raw direct schematic: ${rawBuffer.length} bytes`);
|
||||||
|
}
|
||||||
|
|
||||||
|
cache.set('meta', id, { name: filename, url });
|
||||||
|
const blueprint = await parseSchematicBuffer(rawBuffer, id, url);
|
||||||
|
cache.set('parsed', id, blueprint);
|
||||||
|
return blueprint;
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user