fix(schematics): switch from minecraft-schematics.com to Planet Minecraft

minecraft-schematics.com has Cloudflare WAF blocking POST requests to its AJAX search endpoint, making automated search impossible. Switched to Planet Minecraft which has server-rendered search results and direct schematic download URLs. Also added browser stealth settings (custom user-agent, webdriver flag hiding, AutomationControlled disable) and Cloudflare challenge detection to all Playwright functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 01:04:04 +00:00
parent a6fa95e3f3
commit a739e44ecc
3 changed files with 122 additions and 67 deletions
--- a/src/mcp-server.js
+++ b/src/mcp-server.js
@@ -761,13 +761,13 @@ export function startMcpServer(bedrock, port = 3002) {
      }
    );

-    // ── Tool: minecraft_search_schematics (minecraft-schematics.com) ──
+    // ── Tool: minecraft_search_schematics (Planet Minecraft) ──
    server.registerTool(
      'minecraft_search_schematics',
      {
        title: 'Search Minecraft Schematics',
        description:
-          'Search minecraft-schematics.com for downloadable schematics (20,000+ library). Returns names, URLs, and IDs. Use the URL with minecraft_build_schematic to construct the building. Requires Playwright browser automation.',
+          'Search Planet Minecraft for downloadable schematics. Returns names, URLs, authors, and download counts. Use the URL with minecraft_build_schematic to construct the building. Requires Playwright browser automation.',
        inputSchema: z.object({
          query: z.string().describe('Search query, e.g. "castle", "medieval house", "modern city"'),
          page: z.number().int().min(1).optional().describe('Page number (default 1)'),
@@ -809,15 +809,15 @@ export function startMcpServer(bedrock, port = 3002) {
      }
    );

-    // ── Tool: minecraft_build_schematic (minecraft-schematics.com) ──
+    // ── Tool: minecraft_build_schematic (Planet Minecraft) ──
    server.registerTool(
      'minecraft_build_schematic',
      {
        title: 'Build Schematic',
        description:
-          'Download a schematic from minecraft-schematics.com, parse it, and build it in Minecraft. If no coordinates given, builds at the player\'s current position. Use dryRun to preview materials and dimensions without building. Requires Playwright browser automation.',
+          'Download a schematic from Planet Minecraft, parse it, and build it in Minecraft. If no coordinates given, builds at the player\'s current position. Use dryRun to preview materials and dimensions without building. Requires Playwright browser automation.',
        inputSchema: z.object({
-          url: z.string().describe('Schematic URL from minecraft-schematics.com'),
+          url: z.string().describe('Project URL from Planet Minecraft'),
          x: z.number().int().optional().describe('Build origin X (default: player position)'),
          y: z.number().int().optional().describe('Build origin Y (default: player position)'),
          z: z.number().int().optional().describe('Build origin Z (default: player position)'),
--- a/src/schematics-browser.js
+++ b/src/schematics-browser.js
@@ -26,7 +26,11 @@ async function getBrowser() {
    log(TAG, 'Launching headless Chromium...');
    browserInstance = await pw.chromium.launch({
      headless: true,
-      args: ['--no-sandbox', '--disable-setuid-sandbox'],
+      args: [
+        '--no-sandbox',
+        '--disable-setuid-sandbox',
+        '--disable-blink-features=AutomationControlled',
+      ],
    });

    browserInstance.on('disconnected', () => {
@@ -42,20 +46,50 @@ async function getBrowser() {
  return browserPromise;
 }

+/**
+ * Wait for Cloudflare challenge to resolve (if present).
+ * Polls the page title until it no longer shows the challenge screen.
+ * @param {import('playwright').Page} page
+ * @param {number} timeoutMs
+ */
+async function waitForCloudflare(page, timeoutMs) {
+  const start = Date.now();
+  const pollInterval = 500;
+  while (Date.now() - start < timeoutMs) {
+    const title = await page.title();
+    if (!title.includes('Just a moment')) return;
+    log(TAG, 'Waiting for Cloudflare challenge...');
+    await page.waitForTimeout(pollInterval);
+  }
+  throw new Error('Timed out waiting for Cloudflare challenge to resolve');
+}
+
 /**
 * Fetch a page's HTML content via Playwright.
+ * Handles Cloudflare challenge pages by waiting for them to resolve.
 * @param {string} url
 * @param {number} [timeoutMs=30000]
 * @returns {Promise<string>} HTML content
 */
 export async function fetchPage(url, timeoutMs = 30000) {
  const browser = await getBrowser();
-  const page = await browser.newPage();
+  const context = await browser.newContext({
+    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+    viewport: { width: 1920, height: 1080 },
+    locale: 'en-US',
+  });
+  const page = await context.newPage();
+  // Hide webdriver flag from navigator
+  await page.addInitScript(() => {
+    Object.defineProperty(navigator, 'webdriver', { get: () => false });
+  });
  try {
    await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
+    await waitForCloudflare(page, timeoutMs);
    return await page.content();
  } finally {
    await page.close();
+    await context.close();
  }
 }

@@ -68,9 +102,18 @@ export async function fetchPage(url, timeoutMs = 30000) {
 */
 export async function downloadFile(pageUrl, selector, timeoutMs = 60000) {
  const browser = await getBrowser();
-  const page = await browser.newPage();
+  const context = await browser.newContext({
+    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+    viewport: { width: 1920, height: 1080 },
+    locale: 'en-US',
+  });
+  const page = await context.newPage();
+  await page.addInitScript(() => {
+    Object.defineProperty(navigator, 'webdriver', { get: () => false });
+  });
  try {
    await page.goto(pageUrl, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
+    await waitForCloudflare(page, timeoutMs);

    const [download] = await Promise.all([
      page.waitForEvent('download', { timeout: timeoutMs }),
@@ -84,6 +127,7 @@ export async function downloadFile(pageUrl, selector, timeoutMs = 60000) {
    return readFileSync(path);
  } finally {
    await page.close();
+    await context.close();
  }
 }

@@ -95,8 +139,15 @@ export async function downloadFile(pageUrl, selector, timeoutMs = 60000) {
 */
 export async function downloadUrl(url, timeoutMs = 60000) {
  const browser = await getBrowser();
-  const context = browser.contexts()[0] || await browser.newContext();
+  const context = await browser.newContext({
+    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+    viewport: { width: 1920, height: 1080 },
+    locale: 'en-US',
+  });
  const page = await context.newPage();
+  await page.addInitScript(() => {
+    Object.defineProperty(navigator, 'webdriver', { get: () => false });
+  });
  try {
    const response = await page.goto(url, { waitUntil: 'commit', timeout: timeoutMs });
    if (!response || !response.ok()) {
@@ -105,6 +156,7 @@ export async function downloadUrl(url, timeoutMs = 60000) {
    return await response.body();
  } finally {
    await page.close();
+    await context.close();
  }
 }

--- a/src/schematics.js
+++ b/src/schematics.js
@@ -4,10 +4,11 @@ import * as cache from './schematics-cache.js';
 import { fetchPage, downloadUrl } from './schematics-browser.js';

 const TAG = 'Schematics';
-const BASE_URL = 'https://www.minecraft-schematics.com';
+const BASE_URL = 'https://www.planetminecraft.com';

 /**
- * Search minecraft-schematics.com for schematics.
+ * Search Planet Minecraft for downloadable schematics.
+ * Uses server-side rendered search results (no AJAX needed).
 * @param {string} query
 * @param {number} [page=1]
 * @returns {Promise<{ results: Array<{ id: string, name: string, url: string, author: string, category: string, downloads: string }>, total: number, page: number }>}
@@ -20,65 +21,67 @@ export async function searchSchematics(query, page = 1) {
    return cached;
  }

-  const searchUrl = `${BASE_URL}/search/?q=${encodeURIComponent(query)}&page=${page}`;
+  const searchUrl = `${BASE_URL}/projects/?keywords=${encodeURIComponent(query)}&share=schematic&order=order_popularity&p=${page}`;
  log(TAG, `Searching: ${searchUrl}`);

  let html;
  try {
    html = await fetchPage(searchUrl);
  } catch (err) {
-    throw new Error(`Failed to search minecraft-schematics.com: ${err.message}`);
+    throw new Error(`Failed to search Planet Minecraft: ${err.message}`);
  }

  const results = [];

-  // Parse search results from the HTML
-  // Results are typically in list items or cards with links to /schematic/{id}/
-  const itemRegex = /<a[^>]+href="(\/schematic\/(\d+)\/[^"]*)"[^>]*>([\s\S]*?)<\/a>/gi;
+  // Parse search results — Planet Minecraft uses <li class="resource"> items
+  // Each result has: <a href="/project/slug/" class="r-title">Name</a>
+  // And metadata like views, downloads, author
+  const itemRegex = /<li\s+class="resource[^"]*"[^>]*data-id="(\d+)">([\s\S]*?)<\/li>/gi;
  let match;
  while ((match = itemRegex.exec(html)) !== null) {
-    const url = BASE_URL + match[1];
-    const id = match[2];
-    const inner = match[3];
+    const id = match[1];
+    const block = match[2];

-    // Extract the name from inner content
-    const nameText = inner.replace(/<[^>]+>/g, '').trim();
-    if (!nameText || nameText.length < 2) continue;
-    // Skip navigation/pagination links
-    if (/^\d+$/.test(nameText) || nameText === 'Next' || nameText === 'Previous') continue;
+    // Extract project URL and name from r-title link
+    const titleMatch = block.match(/<a[^>]+href="(\/project\/[^"]+)"[^>]*class="[^"]*r-title[^"]*"[^>]*>([^<]+)<\/a>/i);
+    if (!titleMatch) continue;
+
+    const projectPath = titleMatch[1];
+    const name = titleMatch[2].trim()
+      .replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
+      .replace(/&quot;/g, '"').replace(/&#039;/g, "'");
+    if (!name || name.length < 2) continue;
+
+    // Extract author
+    let author = '';
+    const authorMatch = block.match(/class="[^"]*activity_name[^"]*"[^>]*>(?:<[^>]*>)*([^<]+)/i);
+    if (authorMatch) author = authorMatch[1].trim();
+
+    // Extract category/subject
+    let category = '';
+    const catMatch = block.match(/class="r-subject"[^>]*>\s*([^<]+)/i);
+    if (catMatch) category = catMatch[1].trim();
+
+    // Extract download count
+    let downloads = '';
+    const dlMatch = block.match(/title="downloads"[^>]*><\/i>\s*<span>([\d.]+k?)<\/span>/i);
+    if (dlMatch) downloads = dlMatch[1];

    // Avoid duplicate IDs
    if (results.some(r => r.id === id)) continue;

    results.push({
      id,
-      name: nameText.slice(0, 100),
-      url,
-      author: '',
-      category: '',
-      downloads: '',
+      name: name.slice(0, 100),
+      url: BASE_URL + projectPath,
+      author: author.slice(0, 50),
+      category: category.slice(0, 50),
+      downloads,
    });
  }

-  // Try to extract additional metadata from surrounding HTML
-  // Look for author, category, download count near each result
-  for (const result of results) {
-    const idPattern = new RegExp(`schematic/${result.id}/[\\s\\S]{0,2000}`, 'i');
-    const context = html.match(idPattern);
-    if (context) {
-      const ctx = context[0];
-      const authorMatch = ctx.match(/(?:by|author)[:\s]*([^<\n]+)/i);
-      if (authorMatch) result.author = authorMatch[1].trim().slice(0, 50);
-
-      const catMatch = ctx.match(/category[:\s]*([^<\n]+)/i);
-      if (catMatch) result.category = catMatch[1].trim().slice(0, 50);
-
-      const dlMatch = ctx.match(/([\d,]+)\s*download/i);
-      if (dlMatch) result.downloads = dlMatch[1];
-    }
-  }
-
-  const totalMatch = html.match(/([\d,]+)\s*(?:results?|schematics?)\s*found/i);
+  // Extract total count — "1 - 25 of 1,021"
+  const totalMatch = html.match(/of\s+([\d,]+)\s*<\/p>/i);
  const total = totalMatch ? parseInt(totalMatch[1].replace(/,/g, ''), 10) : results.length;

  const result = { results, total, page };
@@ -89,15 +92,15 @@ export async function searchSchematics(query, page = 1) {
 }

 /**
- * Fetch and parse a schematic from minecraft-schematics.com.
- * @param {string} url - URL like https://www.minecraft-schematics.com/schematic/24287/
+ * Fetch and parse a schematic from Planet Minecraft.
+ * @param {string} url - URL like https://www.planetminecraft.com/project/some-project/
 * @returns {Promise<object>} Blueprint-compatible object with voxels array
 */
 export async function fetchSchematic(url) {
-  // Extract ID from URL
-  const idMatch = url.match(/schematic\/(\d+)/);
-  if (!idMatch) throw new Error(`Invalid schematic URL: ${url}`);
-  const id = idMatch[1];
+  // Extract a stable ID from URL (use slug as ID)
+  const slugMatch = url.match(/\/project\/([^/]+)/);
+  if (!slugMatch) throw new Error(`Invalid project URL: ${url}`);
+  const id = slugMatch[1];

  // Check parsed cache
  const parsedData = cache.get('parsed', id);
@@ -110,35 +113,34 @@ export async function fetchSchematic(url) {
  let rawBuffer = cache.getBuffer('raw', id);

  if (!rawBuffer) {
-    // Fetch the schematic page to find the download link
-    log(TAG, `Fetching schematic page: ${url}`);
+    // Fetch the project page to get metadata and find the download link
+    log(TAG, `Fetching project page: ${url}`);
    let html;
    try {
      html = await fetchPage(url);
    } catch (err) {
-      throw new Error(`Failed to fetch schematic page: ${err.message}`);
+      throw new Error(`Failed to fetch project page: ${err.message}`);
    }

    // Extract metadata
    const titleMatch = html.match(/<title>([^<]+)<\/title>/i);
    const name = titleMatch
-      ? titleMatch[1].replace(/\s*[-|].*Minecraft Schematics.*/i, '').trim()
+      ? titleMatch[1].replace(/\s*[-|].*(?:Minecraft|Planet).*/i, '').trim()
      : `Schematic ${id}`;

    // Cache metadata
    cache.set('meta', id, { name, url });

-    // Find download URL — look for the download link/button
-    // minecraft-schematics.com uses /schematic/{id}/download/ or similar
-    const downloadUrlPath = `/schematic/${id}/download/`;
-    const fullDownloadUrl = BASE_URL + downloadUrlPath;
+    // Find schematic download URL
+    // Planet Minecraft uses: /project/slug/download/schematic/
+    const downloadPath = url.replace(/\/?$/, '/download/schematic/');

-    log(TAG, `Downloading schematic file: ${fullDownloadUrl}`);
+    log(TAG, `Downloading schematic file: ${downloadPath}`);
    try {
-      rawBuffer = await downloadUrl(fullDownloadUrl);
+      rawBuffer = await downloadUrl(downloadPath);
    } catch (err) {
-      // Try alternate download approach — look for direct link in page
-      const dlMatch = html.match(/href="([^"]*download[^"]*)"/i);
+      // Try alternate — look for any download link with "schematic" in it
+      const dlMatch = html.match(/href="([^"]*download[^"]*schematic[^"]*)"/i);
      if (dlMatch) {
        const altUrl = dlMatch[1].startsWith('http') ? dlMatch[1] : BASE_URL + dlMatch[1];
        log(TAG, `Trying alternate download URL: ${altUrl}`);
@@ -248,10 +250,11 @@ async function parseSchematicBuffer(buffer, id, url) {
 export { blueprintToCommands } from './grabcraft.js';

 /**
- * Get categories for minecraft-schematics.com.
+ * Get categories available on Planet Minecraft.
 */
 export function getSchematicCategories() {
  return [
+    { name: '3D Art', slug: '3d-art' },
    { name: 'Castle', slug: 'castle' },
    { name: 'Medieval', slug: 'medieval' },
    { name: 'House', slug: 'house' },