feat: claude one-shot port from nanobot python codebase (v0.1.4.post4)

2026-03-13 08:58:43 -06:00
parent 37c66a1bbf
commit a857bf95cd
53 changed files with 5002 additions and 8 deletions
--- a/src/agent/tools/web.ts
+++ b/src/agent/tools/web.ts
@@ -0,0 +1,174 @@
+import { Readability } from '@mozilla/readability';
+import { parse as parseHtml } from 'node-html-parser';
+import { strArg } from './base.ts';
+import type { Tool } from './base.ts';
+
+const FETCH_TIMEOUT_MS = 30_000;
+const MAX_CONTENT_CHARS = 50_000;
+
+// ---------------------------------------------------------------------------
+// web_search  (Brave Search API)
+// ---------------------------------------------------------------------------
+
+export class WebSearchTool implements Tool {
+  readonly name = 'web_search';
+  readonly description = 'Search the web using Brave Search. Returns a list of results with titles, URLs, and snippets.';
+  readonly parameters = {
+    query: { type: 'string', description: 'Search query.' },
+    count: { type: 'number', description: 'Number of results (default 10, max 20).' },
+  };
+  readonly required = ['query'];
+
+  private _apiKey: string | undefined;
+  private _proxy: string | undefined;
+
+  constructor(opts: { apiKey?: string; proxy?: string } = {}) {
+    this._apiKey = opts.apiKey;
+    this._proxy = opts.proxy;
+  }
+
+  async execute(args: Record<string, unknown>): Promise<string> {
+    const query = strArg(args, 'query').trim();
+    if (!query) return 'Error: query is required.';
+    if (!this._apiKey) return 'Error: BRAVE_API_KEY not configured (set tools.web.braveApiKey in config).';
+
+    const count = Math.min(Number(args['count'] ?? 10), 20);
+    const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${count}`;
+
+    try {
+      const res = await fetchWithTimeout(url, {
+        headers: {
+          'Accept': 'application/json',
+          'Accept-Encoding': 'gzip',
+          'X-Subscription-Token': this._apiKey,
+        },
+      });
+
+      if (!res.ok) return `Error: Brave Search API returned ${res.status}: ${await res.text()}`;
+
+      const data = (await res.json()) as { web?: { results?: Array<{ title: string; url: string; description: string }> } };
+      const results = data.web?.results ?? [];
+
+      if (results.length === 0) return 'No results found.';
+
+      return results
+        .map((r, i) => `${i + 1}. ${r.title}\n   ${r.url}\n   ${r.description ?? ''}`)
+        .join('\n\n');
+    } catch (err) {
+      return `Error: ${String(err)}`;
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// web_fetch
+// ---------------------------------------------------------------------------
+
+export class WebFetchTool implements Tool {
+  readonly name = 'web_fetch';
+  readonly description =
+    'Fetch a URL and return its content. HTML pages are extracted to readable text. Use mode="raw" for JSON/XML/plain text.';
+  readonly parameters = {
+    url: { type: 'string', description: 'URL to fetch.' },
+    mode: { type: 'string', enum: ['markdown', 'text', 'raw'], description: 'Output mode (default: text).' },
+  };
+  readonly required = ['url'];
+
+  private _proxy: string | undefined;
+
+  constructor(opts: { proxy?: string } = {}) {
+    this._proxy = opts.proxy;
+  }
+
+  async execute(args: Record<string, unknown>): Promise<string> {
+    const url = strArg(args, 'url').trim();
+    if (!url) return 'Error: url is required.';
+
+    const mode = strArg(args, 'mode', 'text');
+
+    try {
+      const res = await fetchWithTimeout(url, {
+        headers: { 'User-Agent': 'Mozilla/5.0 (compatible; nanobot/1.0)' },
+      });
+
+      if (!res.ok) return `Error: HTTP ${res.status} from ${url}`;
+
+      const contentType = res.headers.get('content-type') ?? '';
+      const body = await res.text();
+
+      if (mode === 'raw' || (!contentType.includes('text/html') && !body.trimStart().startsWith('<'))) {
+        const truncated = body.length > MAX_CONTENT_CHARS ? body.slice(0, MAX_CONTENT_CHARS) + '\n... (truncated)' : body;
+        return truncated;
+      }
+
+      // Parse HTML with Readability
+      // Readability needs a DOM — build one from node-html-parser
+      const root = parseHtml(body);
+
+      // Minimal JSDOM-compatible interface for Readability
+      // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing requires any
+      const doc = makePseudoDocument(url, body, root) as any;
+      const reader = new Readability(doc);
+      const article = reader.parse();
+
+      const title = article?.title ?? '';
+      const textContent = article?.textContent ?? stripTags(body);
+      const trimmed = textContent.replace(/\n{3,}/g, '\n\n').trim();
+      const truncated = trimmed.length > MAX_CONTENT_CHARS
+        ? trimmed.slice(0, MAX_CONTENT_CHARS) + '\n... (truncated)'
+        : trimmed;
+
+      return title ? `# ${title}\n\n${truncated}` : truncated;
+    } catch (err) {
+      return `Error fetching ${url}: ${String(err)}`;
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function fetchWithTimeout(url: string, init: RequestInit = {}): Promise<Response> {
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
+  return fetch(url, { ...init, signal: controller.signal }).finally(() => clearTimeout(timer));
+}
+
+function stripTags(html: string): string {
+  return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
+}
+
+/** Build a minimal pseudo-document that satisfies Readability's interface. */
+function makePseudoDocument(
+  url: string,
+  html: string,
+  root: ReturnType<typeof parseHtml>,
+): Record<string, unknown> {
+  // node-html-parser's API is close enough for Readability's needs when
+  // accessed via a proxy. We create a real DOMParser-like wrapper.
+  // Bun/Node don't have DOMParser built-in, so we duck-type what Readability
+  // needs: baseURI, documentURI, querySelector, querySelectorAll, innerHTML.
+  const pseudoDoc = {
+    baseURI: url,
+    documentURI: url,
+    URL: url,
+    title: root.querySelector('title')?.text ?? '',
+    documentElement: root,
+    body: root.querySelector('body') ?? root,
+    head: root.querySelector('head') ?? root,
+    // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing
+    querySelector: (sel: string) => root.querySelector(sel) as any,
+    // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing
+    querySelectorAll: (sel: string) => root.querySelectorAll(sel) as any,
+    getElementsByTagName: (tag: string) => root.querySelectorAll(tag),
+    createElement: (_tag: string) => ({ innerHTML: '', textContent: '', style: {} }),
+    createTreeWalker: () => ({ nextNode: () => null }),
+    createRange: () => ({ selectNodeContents: () => {}, cloneContents: () => null }),
+    // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing
+    get innerHTML() { return html; },
+    location: { href: url },
+  };
+
+  return pseudoDoc;
+}