feat: use Playwright for GrabCraft HTML pages and add direct schematic URL support

GrabCraft loads voxel data via external JS scripts after page load, so plain fetch() misses the rendered content. Now uses Playwright (via schematics-browser) for HTML pages while keeping plain fetch for .js files. Also adds a 5s wait in fetchPage for async JS content and supports direct .schem/.schematic/.litematic URLs without needing a project page. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 21:56:23 +00:00
parent df8b367e8c
commit 7f96f755c7
3 changed files with 66 additions and 15 deletions
--- a/src/grabcraft.js
+++ b/src/grabcraft.js
@@ -1,5 +1,6 @@
 import { log, logError } from './utils.js';
 import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
+import { fetchPage as fetchPageBrowser } from './schematics-browser.js';

 const TAG = 'GrabCraft';

@@ -462,21 +463,22 @@ function extractMaterials(html) {
 */
 async function fetchPage(url) {
  const isJs = url.endsWith('.js');
-  const response = await fetch(url, {
-    headers: {
-      'User-Agent': USER_AGENT,
-      'Accept': isJs
-        ? 'application/javascript, */*;q=0.8'
-        : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-      'Accept-Language': 'en-US,en;q=0.5',
-    },
-  });
-
-  if (!response.ok) {
-    throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
+  if (isJs) {
+    // Plain fetch for JS files (no rendering needed)
+    const response = await fetch(url, {
+      headers: {
+        'User-Agent': USER_AGENT,
+        'Accept': 'application/javascript, */*;q=0.8',
+        'Accept-Language': 'en-US,en;q=0.5',
+      },
+    });
+    if (!response.ok) {
+      throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
+    }
+    return response.text();
  }
-
-  return response.text();
+  // Playwright for HTML pages — waits for JS to render
+  return fetchPageBrowser(url);
 }

 /**
--- a/src/schematics-browser.js
+++ b/src/schematics-browser.js
@@ -86,6 +86,7 @@ export async function fetchPage(url, timeoutMs = 30000) {
  try {
    await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
    await waitForCloudflare(page, timeoutMs);
+    await page.waitForTimeout(5000); // Wait for async JS content (e.g. GrabCraft voxel data)
    return await page.content();
  } finally {
    await page.close();
--- a/src/schematics.js
+++ b/src/schematics.js
@@ -1,7 +1,7 @@
 import { log, logError } from './utils.js';
 import { resolveBlock, formatBlock, getUnknownBlocks, clearUnknownBlocks } from './block-map.js';
 import * as cache from './schematics-cache.js';
-import { fetchPage, downloadSchematic } from './schematics-browser.js';
+import { fetchPage, downloadSchematic, downloadUrl } from './schematics-browser.js';
 import AdmZip from 'adm-zip';

 const TAG = 'Schematics';
@@ -98,6 +98,12 @@ export async function searchSchematics(query, page = 1) {
 * @returns {Promise<object>} Blueprint-compatible object with voxels array
 */
 export async function fetchSchematic(url) {
+  // Handle direct schematic file URLs (.schem, .schematic, .litematic, .nbt)
+  const directMatch = url.match(/\.(schem|schematic|litematic|nbt)(\?.*)?$/i);
+  if (directMatch) {
+    return fetchDirectSchematic(url);
+  }
+
  // Extract a stable ID from URL (use slug as ID)
  const slugMatch = url.match(/\/project\/([^/]+)/);
  if (!slugMatch) throw new Error(`Invalid project URL: ${url}`);
@@ -474,3 +480,45 @@ export function getSchematicCategories() {
    { name: 'Other', slug: 'other' },
  ];
 }
+
+/**
+ * Fetch a schematic directly from a URL (no project page needed).
+ * Supports direct links to .schem, .schematic, .litematic, .nbt files.
+ * @param {string} url - Direct download URL
+ * @returns {Promise<object>} Blueprint-compatible object
+ */
+async function fetchDirectSchematic(url) {
+  // Use filename as ID
+  const filename = url.split('/').pop().split('?')[0];
+  const id = `direct-${cache.cacheKey(filename)}`;
+
+  const parsedData = cache.get('parsed', id);
+  if (parsedData) {
+    log(TAG, `Direct schematic cache hit: ${filename}`);
+    return parsedData;
+  }
+
+  let rawBuffer = cache.getBuffer('raw', id);
+
+  if (!rawBuffer) {
+    log(TAG, `Downloading direct schematic: ${url}`);
+    try {
+      rawBuffer = await downloadUrl(url);
+    } catch (err) {
+      throw new Error(`Failed to download schematic from ${url}: ${err.message}`);
+    }
+
+    if (!rawBuffer || rawBuffer.length === 0) {
+      throw new Error('Downloaded schematic file is empty');
+    }
+
+    rawBuffer = extractFromZipIfNeeded(rawBuffer);
+    cache.setBuffer('raw', id, rawBuffer);
+    log(TAG, `Cached raw direct schematic: ${rawBuffer.length} bytes`);
+  }
+
+  cache.set('meta', id, { name: filename, url });
+  const blueprint = await parseSchematicBuffer(rawBuffer, id, url);
+  cache.set('parsed', id, blueprint);
+  return blueprint;
+}