Source code for atloop.tools.search.search
"""Enhanced search tool using grep with regex, context lines, and file filtering."""
import shlex
from typing import Any, Dict, Optional
from atloop.runtime.sandbox_adapter import SandboxAdapter
from atloop.tools.base import BaseTool, ToolResult
[docs]
class SearchTool(BaseTool):
"""
Enhanced tool for searching file contents using grep with regex, context lines, and file filtering.
**Features:**
- Full regex pattern support
- Context lines (before/after matches)
- File filtering by glob pattern
- Multiple output modes (content, files, count)
- Case-insensitive search option
**Use cases:**
- Finding function definitions: `search('def function_name')`
- Finding imports: `search('^import |^from ')`
- Finding specific patterns: `search('TODO|FIXME')`
- Searching in specific file types: `search('pattern', glob='*.py')`
"""
[docs]
def __init__(self, sandbox: SandboxAdapter):
"""
Initialize search tool.
Args:
sandbox: Sandbox adapter instance
"""
self.sandbox = sandbox
@property
def name(self) -> str:
"""Tool name."""
return "search"
@property
def description(self) -> str:
"""Tool description."""
return "强大的文件内容搜索工具(支持正则表达式、上下文行、文件过滤)\n 参数: query (string): 搜索查询(正则表达式,必需)\n glob (string, 可选): 文件过滤模式(如 '*.py', '**/*.js')\n output_mode (string, 可选): 输出模式 - 'content'(显示匹配行,默认)、'files_with_matches'(仅文件路径)、'count'(匹配计数)\n -A (int, 可选): 显示匹配行后的行数\n -B (int, 可选): 显示匹配行前的行数\n -C (int, 可选): 显示匹配行前后的行数\n -i (bool, 可选): 忽略大小写(默认 false)\n -n (bool, 可选): 显示行号(默认 true)\n max_results (int, 可选): 最大结果数(默认 50)"
[docs]
def validate_args(self, args: Dict[str, Any]) -> tuple[bool, Optional[str]]:
"""Validate arguments."""
if "query" not in args:
return False, "Missing required argument: 'query'"
if not isinstance(args["query"], str):
return False, "Argument 'query' must be a string"
if "output_mode" in args and args["output_mode"] not in [
"content",
"files_with_matches",
"count",
]:
return (
False,
"Argument 'output_mode' must be one of: 'content', 'files_with_matches', 'count'",
)
if "-A" in args and not isinstance(args["-A"], int):
return False, "Argument '-A' must be an integer"
if "-B" in args and not isinstance(args["-B"], int):
return False, "Argument '-B' must be an integer"
if "-C" in args and not isinstance(args["-C"], int):
return False, "Argument '-C' must be an integer"
if "-i" in args and not isinstance(args["-i"], bool):
return False, "Argument '-i' must be a boolean"
if "-n" in args and not isinstance(args["-n"], bool):
return False, "Argument '-n' must be a boolean"
if "max_results" in args and not isinstance(args.get("max_results"), int):
return False, "Argument 'max_results' must be an integer"
return True, None
[docs]
def execute(self, args: Dict[str, Any]) -> ToolResult:
"""
Execute enhanced search tool to find patterns in files.
**Args:**
args: Tool arguments dictionary
- query (str, required): Search query as regex pattern. Supports full regex syntax.
Examples: "def function", "^import ", "TODO|FIXME", "class \\w+"
- glob (str, optional): Glob pattern to filter files. Examples: "*.py", "**/*.js"
- output_mode (str, optional): Output mode. Default: "content"
- "content": Show matching lines with context (default)
- "files_with_matches": Show only file paths that contain matches
- "count": Show match count per file
- -A (int, optional): Number of lines to show after each match
- -B (int, optional): Number of lines to show before each match
- -C (int, optional): Number of lines to show before and after each match
- -i (bool, optional): Case-insensitive search. Default: False
- -n (bool, optional): Show line numbers. Default: True
- max_results (int, optional): Maximum number of results to return.
Default: 50. Limits output for very large result sets.
**Returns:**
ToolResult with:
- ok (bool): True if search executed successfully (no errors in stderr)
- stdout (str): Search results (matching lines, file paths, or counts)
- stderr (str): Error messages if any (empty string means success)
- meta (dict): Contains query, glob, output_mode, max_results, cmd
**Examples:**
# Find function definitions
search(query="def \\w+", glob="*.py")
# Find imports with context
search(query="^import |^from ", glob="*.py", -C=2)
# Find TODO comments (case-insensitive)
search(query="TODO|FIXME", -i=True)
# Find files containing pattern (without showing content)
search(query="def main", output_mode="files_with_matches")
# Count matches per file
search(query="class \\w+", output_mode="count")
**Regex Pattern Tips:**
- Use `^` for start of line: `^def` matches "def" at line start
- Use `$` for end of line: `import$` matches "import" at line end
- Use `|` for alternation: `TODO|FIXME` matches either
- Use `\\w+` for word characters: `def \\w+` matches "def function_name"
- Escape special chars: `\\.` matches literal dot
**Context Lines:**
- Use `-C=3` to show 3 lines before and after each match
- Use `-B=5` to show 5 lines before each match
- Use `-A=2` to show 2 lines after each match
- Context helps understand code structure around matches
**Error Handling:**
- Success is determined by stderr content, not exit code
- If stderr is empty, the search succeeded
- No matches found is considered success (ok=True, empty stdout)
- Check stderr for specific error messages if ok=False
"""
query = args["query"]
glob = args.get("glob")
output_mode = args.get("output_mode", "content")
before = args.get("-B")
after = args.get("-A")
context = args.get("-C")
case_insensitive = args.get("-i", False)
line_numbers = args.get("-n", True)
max_results = args.get("max_results", 50)
query_escaped = shlex.quote(query)
# Build grep command with enhanced options
cmd_parts = ["grep", "-r"]
# Add case insensitive flag
if case_insensitive:
cmd_parts.append("-i")
# Add line numbers (only for content mode)
if line_numbers and output_mode == "content":
cmd_parts.append("-n")
# Add context lines (only for content mode)
if output_mode == "content":
if context is not None:
cmd_parts.extend(["-C", str(context)])
else:
if before is not None:
cmd_parts.extend(["-B", str(before)])
if after is not None:
cmd_parts.extend(["-A", str(after)])
# Add file filtering
if glob:
if "**" in glob:
# Recursive pattern: use find + grep
find_pattern = glob.replace("**/", "").replace("./", "")
cmd_parts.append("--include")
cmd_parts.append(shlex.quote(find_pattern))
else:
include_pattern = glob.lstrip("./")
cmd_parts.append("--include")
cmd_parts.append(shlex.quote(include_pattern))
# Add output mode flags
if output_mode == "files_with_matches":
cmd_parts.append("-l") # List files only
elif output_mode == "count":
cmd_parts.append("-c") # Count matches
# Add regex pattern
cmd_parts.append("-E") # Extended regex
cmd_parts.append(query_escaped)
# Add search path
cmd_parts.append(".")
# Build final command
cmd = " ".join(cmd_parts)
# Add result limiting
if output_mode == "content":
# Limit output lines
cmd = f"{cmd} 2>/dev/null | head -n {max_results}"
elif output_mode == "files_with_matches":
# Limit file count
cmd = f"{cmd} 2>/dev/null | head -n {max_results}"
else: # count
# Limit count entries
cmd = f"{cmd} 2>/dev/null | head -n {max_results}"
result = self.sandbox.exec_shell(
command=cmd,
workdir="/workspace",
timeout_seconds=60,
)
# Determine success based on stderr content, not exit code
stderr = result.get("stderr", "")
ok = not bool(stderr.strip()) # Success if no error messages in stderr
return ToolResult(
ok=ok,
stdout=result.get("stdout", ""),
stderr=stderr,
meta={
"query": query,
"glob": glob,
"output_mode": output_mode,
"max_results": max_results,
"cmd": cmd,
},
)