Source code for atloop.tools.search.search

"""Enhanced search tool using grep with regex, context lines, and file filtering."""

import shlex
from typing import Any, Dict, Optional

from atloop.runtime.sandbox_adapter import SandboxAdapter
from atloop.tools.base import BaseTool, ToolResult


[docs] class SearchTool(BaseTool): """ Enhanced tool for searching file contents using grep with regex, context lines, and file filtering. **Features:** - Full regex pattern support - Context lines (before/after matches) - File filtering by glob pattern - Multiple output modes (content, files, count) - Case-insensitive search option **Use cases:** - Finding function definitions: `search('def function_name')` - Finding imports: `search('^import |^from ')` - Finding specific patterns: `search('TODO|FIXME')` - Searching in specific file types: `search('pattern', glob='*.py')` """
[docs] def __init__(self, sandbox: SandboxAdapter): """ Initialize search tool. Args: sandbox: Sandbox adapter instance """ self.sandbox = sandbox
@property def name(self) -> str: """Tool name.""" return "search" @property def description(self) -> str: """Tool description.""" return "强大的文件内容搜索工具(支持正则表达式、上下文行、文件过滤)\n 参数: query (string): 搜索查询(正则表达式,必需)\n glob (string, 可选): 文件过滤模式(如 '*.py', '**/*.js')\n output_mode (string, 可选): 输出模式 - 'content'(显示匹配行,默认)、'files_with_matches'(仅文件路径)、'count'(匹配计数)\n -A (int, 可选): 显示匹配行后的行数\n -B (int, 可选): 显示匹配行前的行数\n -C (int, 可选): 显示匹配行前后的行数\n -i (bool, 可选): 忽略大小写(默认 false)\n -n (bool, 可选): 显示行号(默认 true)\n max_results (int, 可选): 最大结果数(默认 50)"
[docs] def validate_args(self, args: Dict[str, Any]) -> tuple[bool, Optional[str]]: """Validate arguments.""" if "query" not in args: return False, "Missing required argument: 'query'" if not isinstance(args["query"], str): return False, "Argument 'query' must be a string" if "output_mode" in args and args["output_mode"] not in [ "content", "files_with_matches", "count", ]: return ( False, "Argument 'output_mode' must be one of: 'content', 'files_with_matches', 'count'", ) if "-A" in args and not isinstance(args["-A"], int): return False, "Argument '-A' must be an integer" if "-B" in args and not isinstance(args["-B"], int): return False, "Argument '-B' must be an integer" if "-C" in args and not isinstance(args["-C"], int): return False, "Argument '-C' must be an integer" if "-i" in args and not isinstance(args["-i"], bool): return False, "Argument '-i' must be a boolean" if "-n" in args and not isinstance(args["-n"], bool): return False, "Argument '-n' must be a boolean" if "max_results" in args and not isinstance(args.get("max_results"), int): return False, "Argument 'max_results' must be an integer" return True, None
[docs] def execute(self, args: Dict[str, Any]) -> ToolResult: """ Execute enhanced search tool to find patterns in files. **Args:** args: Tool arguments dictionary - query (str, required): Search query as regex pattern. Supports full regex syntax. Examples: "def function", "^import ", "TODO|FIXME", "class \\w+" - glob (str, optional): Glob pattern to filter files. Examples: "*.py", "**/*.js" - output_mode (str, optional): Output mode. Default: "content" - "content": Show matching lines with context (default) - "files_with_matches": Show only file paths that contain matches - "count": Show match count per file - -A (int, optional): Number of lines to show after each match - -B (int, optional): Number of lines to show before each match - -C (int, optional): Number of lines to show before and after each match - -i (bool, optional): Case-insensitive search. Default: False - -n (bool, optional): Show line numbers. Default: True - max_results (int, optional): Maximum number of results to return. Default: 50. Limits output for very large result sets. **Returns:** ToolResult with: - ok (bool): True if search executed successfully (no errors in stderr) - stdout (str): Search results (matching lines, file paths, or counts) - stderr (str): Error messages if any (empty string means success) - meta (dict): Contains query, glob, output_mode, max_results, cmd **Examples:** # Find function definitions search(query="def \\w+", glob="*.py") # Find imports with context search(query="^import |^from ", glob="*.py", -C=2) # Find TODO comments (case-insensitive) search(query="TODO|FIXME", -i=True) # Find files containing pattern (without showing content) search(query="def main", output_mode="files_with_matches") # Count matches per file search(query="class \\w+", output_mode="count") **Regex Pattern Tips:** - Use `^` for start of line: `^def` matches "def" at line start - Use `$` for end of line: `import$` matches "import" at line end - Use `|` for alternation: `TODO|FIXME` matches either - Use `\\w+` for word characters: `def \\w+` matches "def function_name" - Escape special chars: `\\.` matches literal dot **Context Lines:** - Use `-C=3` to show 3 lines before and after each match - Use `-B=5` to show 5 lines before each match - Use `-A=2` to show 2 lines after each match - Context helps understand code structure around matches **Error Handling:** - Success is determined by stderr content, not exit code - If stderr is empty, the search succeeded - No matches found is considered success (ok=True, empty stdout) - Check stderr for specific error messages if ok=False """ query = args["query"] glob = args.get("glob") output_mode = args.get("output_mode", "content") before = args.get("-B") after = args.get("-A") context = args.get("-C") case_insensitive = args.get("-i", False) line_numbers = args.get("-n", True) max_results = args.get("max_results", 50) query_escaped = shlex.quote(query) # Build grep command with enhanced options cmd_parts = ["grep", "-r"] # Add case insensitive flag if case_insensitive: cmd_parts.append("-i") # Add line numbers (only for content mode) if line_numbers and output_mode == "content": cmd_parts.append("-n") # Add context lines (only for content mode) if output_mode == "content": if context is not None: cmd_parts.extend(["-C", str(context)]) else: if before is not None: cmd_parts.extend(["-B", str(before)]) if after is not None: cmd_parts.extend(["-A", str(after)]) # Add file filtering if glob: if "**" in glob: # Recursive pattern: use find + grep find_pattern = glob.replace("**/", "").replace("./", "") cmd_parts.append("--include") cmd_parts.append(shlex.quote(find_pattern)) else: include_pattern = glob.lstrip("./") cmd_parts.append("--include") cmd_parts.append(shlex.quote(include_pattern)) # Add output mode flags if output_mode == "files_with_matches": cmd_parts.append("-l") # List files only elif output_mode == "count": cmd_parts.append("-c") # Count matches # Add regex pattern cmd_parts.append("-E") # Extended regex cmd_parts.append(query_escaped) # Add search path cmd_parts.append(".") # Build final command cmd = " ".join(cmd_parts) # Add result limiting if output_mode == "content": # Limit output lines cmd = f"{cmd} 2>/dev/null | head -n {max_results}" elif output_mode == "files_with_matches": # Limit file count cmd = f"{cmd} 2>/dev/null | head -n {max_results}" else: # count # Limit count entries cmd = f"{cmd} 2>/dev/null | head -n {max_results}" result = self.sandbox.exec_shell( command=cmd, workdir="/workspace", timeout_seconds=60, ) # Determine success based on stderr content, not exit code stderr = result.get("stderr", "") ok = not bool(stderr.strip()) # Success if no error messages in stderr return ToolResult( ok=ok, stdout=result.get("stdout", ""), stderr=stderr, meta={ "query": query, "glob": glob, "output_mode": output_mode, "max_results": max_results, "cmd": cmd, }, )