import { Readability } from '@mozilla/readability'; import { parse as parseHtml } from 'node-html-parser'; import { strArg } from './base.ts'; import type { Tool } from './base.ts'; const FETCH_TIMEOUT_MS = 30_000; const MAX_CONTENT_CHARS = 50_000; // --------------------------------------------------------------------------- // web_search (Brave Search API) // --------------------------------------------------------------------------- export class WebSearchTool implements Tool { readonly name = 'web_search'; readonly description = 'Search the web using Brave Search. Returns a list of results with titles, URLs, and snippets.'; readonly parameters = { query: { type: 'string', description: 'Search query.' }, count: { type: 'number', description: 'Number of results (default 10, max 20).' }, }; readonly required = ['query']; private _apiKey: string | undefined; private _proxy: string | undefined; constructor(opts: { apiKey?: string; proxy?: string } = {}) { this._apiKey = opts.apiKey; this._proxy = opts.proxy; } async execute(args: Record): Promise { const query = strArg(args, 'query').trim(); if (!query) return 'Error: query is required.'; if (!this._apiKey) return 'Error: BRAVE_API_KEY not configured (set tools.web.braveApiKey in config).'; const count = Math.min(Number(args['count'] ?? 10), 20); const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${count}`; try { const res = await fetchWithTimeout(url, { headers: { Accept: 'application/json', 'Accept-Encoding': 'gzip', 'X-Subscription-Token': this._apiKey, }, }); if (!res.ok) return `Error: Brave Search API returned ${res.status}: ${await res.text()}`; const data = (await res.json()) as { web?: { results?: Array<{ title: string; url: string; description: string }> }; }; const results = data.web?.results ?? []; if (results.length === 0) return 'No results found.'; return results .map((r, i) => `${i + 1}. ${r.title}\n ${r.url}\n ${r.description ?? ''}`) .join('\n\n'); } catch (err) { return `Error: ${String(err)}`; } } } // --------------------------------------------------------------------------- // web_fetch // --------------------------------------------------------------------------- export class WebFetchTool implements Tool { readonly name = 'web_fetch'; readonly description = 'Fetch a URL and return its content. HTML pages are extracted to readable text. Use mode="raw" for JSON/XML/plain text.'; readonly parameters = { url: { type: 'string', description: 'URL to fetch.' }, mode: { type: 'string', enum: ['markdown', 'text', 'raw'], description: 'Output mode (default: text).', }, }; readonly required = ['url']; private _proxy: string | undefined; constructor(opts: { proxy?: string } = {}) { this._proxy = opts.proxy; } async execute(args: Record): Promise { const url = strArg(args, 'url').trim(); if (!url) return 'Error: url is required.'; const mode = strArg(args, 'mode', 'text'); try { const res = await fetchWithTimeout(url, { headers: { 'User-Agent': 'Mozilla/5.0 (compatible; nanobot/1.0)' }, }); if (!res.ok) return `Error: HTTP ${res.status} from ${url}`; const contentType = res.headers.get('content-type') ?? ''; const body = await res.text(); if ( mode === 'raw' || (!contentType.includes('text/html') && !body.trimStart().startsWith('<')) ) { const truncated = body.length > MAX_CONTENT_CHARS ? body.slice(0, MAX_CONTENT_CHARS) + '\n... (truncated)' : body; return truncated; } // Parse HTML with Readability // Readability needs a DOM — build one from node-html-parser const root = parseHtml(body); // Minimal JSDOM-compatible interface for Readability // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing requires any const doc = makePseudoDocument(url, body, root) as any; const reader = new Readability(doc); const article = reader.parse(); const title = article?.title ?? ''; const textContent = article?.textContent ?? stripTags(body); const trimmed = textContent.replace(/\n{3,}/g, '\n\n').trim(); const truncated = trimmed.length > MAX_CONTENT_CHARS ? trimmed.slice(0, MAX_CONTENT_CHARS) + '\n... (truncated)' : trimmed; return title ? `# ${title}\n\n${truncated}` : truncated; } catch (err) { return `Error fetching ${url}: ${String(err)}`; } } } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- function fetchWithTimeout(url: string, init: RequestInit = {}): Promise { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); return fetch(url, { ...init, signal: controller.signal }).finally(() => clearTimeout(timer)); } function stripTags(html: string): string { return html .replace(/<[^>]*>/g, ' ') .replace(/\s+/g, ' ') .trim(); } /** Build a minimal pseudo-document that satisfies Readability's interface. */ function makePseudoDocument( url: string, html: string, root: ReturnType, ): Record { // node-html-parser's API is close enough for Readability's needs when // accessed via a proxy. We create a real DOMParser-like wrapper. // Bun/Node don't have DOMParser built-in, so we duck-type what Readability // needs: baseURI, documentURI, querySelector, querySelectorAll, innerHTML. const pseudoDoc = { baseURI: url, documentURI: url, URL: url, title: root.querySelector('title')?.text ?? '', documentElement: root, body: root.querySelector('body') ?? root, head: root.querySelector('head') ?? root, // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing querySelector: (sel: string) => root.querySelector(sel) as any, // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing querySelectorAll: (sel: string) => root.querySelectorAll(sel) as any, getElementsByTagName: (tag: string) => root.querySelectorAll(tag), createElement: (_tag: string) => ({ innerHTML: '', textContent: '', style: {} }), createTreeWalker: () => ({ nextNode: () => null }), createRange: () => ({ selectNodeContents: () => {}, cloneContents: () => null }), // biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing get innerHTML() { return html; }, location: { href: url }, }; return pseudoDoc; }