feat: claude one-shot port from nanobot python codebase (v0.1.4.post4)
This commit is contained in:
174
src/agent/tools/web.ts
Normal file
174
src/agent/tools/web.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
import { Readability } from '@mozilla/readability';
|
||||
import { parse as parseHtml } from 'node-html-parser';
|
||||
import { strArg } from './base.ts';
|
||||
import type { Tool } from './base.ts';
|
||||
|
||||
const FETCH_TIMEOUT_MS = 30_000;
|
||||
const MAX_CONTENT_CHARS = 50_000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// web_search (Brave Search API)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class WebSearchTool implements Tool {
|
||||
readonly name = 'web_search';
|
||||
readonly description = 'Search the web using Brave Search. Returns a list of results with titles, URLs, and snippets.';
|
||||
readonly parameters = {
|
||||
query: { type: 'string', description: 'Search query.' },
|
||||
count: { type: 'number', description: 'Number of results (default 10, max 20).' },
|
||||
};
|
||||
readonly required = ['query'];
|
||||
|
||||
private _apiKey: string | undefined;
|
||||
private _proxy: string | undefined;
|
||||
|
||||
constructor(opts: { apiKey?: string; proxy?: string } = {}) {
|
||||
this._apiKey = opts.apiKey;
|
||||
this._proxy = opts.proxy;
|
||||
}
|
||||
|
||||
async execute(args: Record<string, unknown>): Promise<string> {
|
||||
const query = strArg(args, 'query').trim();
|
||||
if (!query) return 'Error: query is required.';
|
||||
if (!this._apiKey) return 'Error: BRAVE_API_KEY not configured (set tools.web.braveApiKey in config).';
|
||||
|
||||
const count = Math.min(Number(args['count'] ?? 10), 20);
|
||||
const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${count}`;
|
||||
|
||||
try {
|
||||
const res = await fetchWithTimeout(url, {
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Accept-Encoding': 'gzip',
|
||||
'X-Subscription-Token': this._apiKey,
|
||||
},
|
||||
});
|
||||
|
||||
if (!res.ok) return `Error: Brave Search API returned ${res.status}: ${await res.text()}`;
|
||||
|
||||
const data = (await res.json()) as { web?: { results?: Array<{ title: string; url: string; description: string }> } };
|
||||
const results = data.web?.results ?? [];
|
||||
|
||||
if (results.length === 0) return 'No results found.';
|
||||
|
||||
return results
|
||||
.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}\n ${r.description ?? ''}`)
|
||||
.join('\n\n');
|
||||
} catch (err) {
|
||||
return `Error: ${String(err)}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// web_fetch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class WebFetchTool implements Tool {
|
||||
readonly name = 'web_fetch';
|
||||
readonly description =
|
||||
'Fetch a URL and return its content. HTML pages are extracted to readable text. Use mode="raw" for JSON/XML/plain text.';
|
||||
readonly parameters = {
|
||||
url: { type: 'string', description: 'URL to fetch.' },
|
||||
mode: { type: 'string', enum: ['markdown', 'text', 'raw'], description: 'Output mode (default: text).' },
|
||||
};
|
||||
readonly required = ['url'];
|
||||
|
||||
private _proxy: string | undefined;
|
||||
|
||||
constructor(opts: { proxy?: string } = {}) {
|
||||
this._proxy = opts.proxy;
|
||||
}
|
||||
|
||||
async execute(args: Record<string, unknown>): Promise<string> {
|
||||
const url = strArg(args, 'url').trim();
|
||||
if (!url) return 'Error: url is required.';
|
||||
|
||||
const mode = strArg(args, 'mode', 'text');
|
||||
|
||||
try {
|
||||
const res = await fetchWithTimeout(url, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; nanobot/1.0)' },
|
||||
});
|
||||
|
||||
if (!res.ok) return `Error: HTTP ${res.status} from ${url}`;
|
||||
|
||||
const contentType = res.headers.get('content-type') ?? '';
|
||||
const body = await res.text();
|
||||
|
||||
if (mode === 'raw' || (!contentType.includes('text/html') && !body.trimStart().startsWith('<'))) {
|
||||
const truncated = body.length > MAX_CONTENT_CHARS ? body.slice(0, MAX_CONTENT_CHARS) + '\n... (truncated)' : body;
|
||||
return truncated;
|
||||
}
|
||||
|
||||
// Parse HTML with Readability
|
||||
// Readability needs a DOM — build one from node-html-parser
|
||||
const root = parseHtml(body);
|
||||
|
||||
// Minimal JSDOM-compatible interface for Readability
|
||||
// biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing requires any
|
||||
const doc = makePseudoDocument(url, body, root) as any;
|
||||
const reader = new Readability(doc);
|
||||
const article = reader.parse();
|
||||
|
||||
const title = article?.title ?? '';
|
||||
const textContent = article?.textContent ?? stripTags(body);
|
||||
const trimmed = textContent.replace(/\n{3,}/g, '\n\n').trim();
|
||||
const truncated = trimmed.length > MAX_CONTENT_CHARS
|
||||
? trimmed.slice(0, MAX_CONTENT_CHARS) + '\n... (truncated)'
|
||||
: trimmed;
|
||||
|
||||
return title ? `# ${title}\n\n${truncated}` : truncated;
|
||||
} catch (err) {
|
||||
return `Error fetching ${url}: ${String(err)}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function fetchWithTimeout(url: string, init: RequestInit = {}): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
return fetch(url, { ...init, signal: controller.signal }).finally(() => clearTimeout(timer));
|
||||
}
|
||||
|
||||
function stripTags(html: string): string {
|
||||
return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
|
||||
/** Build a minimal pseudo-document that satisfies Readability's interface. */
|
||||
function makePseudoDocument(
|
||||
url: string,
|
||||
html: string,
|
||||
root: ReturnType<typeof parseHtml>,
|
||||
): Record<string, unknown> {
|
||||
// node-html-parser's API is close enough for Readability's needs when
|
||||
// accessed via a proxy. We create a real DOMParser-like wrapper.
|
||||
// Bun/Node don't have DOMParser built-in, so we duck-type what Readability
|
||||
// needs: baseURI, documentURI, querySelector, querySelectorAll, innerHTML.
|
||||
const pseudoDoc = {
|
||||
baseURI: url,
|
||||
documentURI: url,
|
||||
URL: url,
|
||||
title: root.querySelector('title')?.text ?? '',
|
||||
documentElement: root,
|
||||
body: root.querySelector('body') ?? root,
|
||||
head: root.querySelector('head') ?? root,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing
|
||||
querySelector: (sel: string) => root.querySelector(sel) as any,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing
|
||||
querySelectorAll: (sel: string) => root.querySelectorAll(sel) as any,
|
||||
getElementsByTagName: (tag: string) => root.querySelectorAll(tag),
|
||||
createElement: (_tag: string) => ({ innerHTML: '', textContent: '', style: {} }),
|
||||
createTreeWalker: () => ({ nextNode: () => null }),
|
||||
createRange: () => ({ selectNodeContents: () => {}, cloneContents: () => null }),
|
||||
// biome-ignore lint/suspicious/noExplicitAny: Readability duck-typing
|
||||
get innerHTML() { return html; },
|
||||
location: { href: url },
|
||||
};
|
||||
|
||||
return pseudoDoc;
|
||||
}
|
||||
Reference in New Issue
Block a user