Source code for atloop.tools.filesystem.read_file

"""Read file tool with enhanced capabilities."""

import shlex
from typing import Any, Dict, Optional

from atloop.runtime.sandbox_adapter import SandboxAdapter
from atloop.tools.base import BaseTool, ToolResult
from atloop.tools.output_semantic_type import OutputSemanticType


[docs] class ReadFileTool(BaseTool): """ Tool for reading files from the sandbox workspace with type detection and chunked reading. **Features:** - Automatic file type detection (text/binary) - Support for large files via line range reading - Binary file detection and handling - File size limits (10MB max for full read) **Use cases:** - Reading source code files - Reading configuration files - Reading documentation files - Reading log files (with line ranges) **Note**: This tool reads from the sandbox workspace (/workspace), not from the local machine. For reading skill files (stored locally), use `read_skill_file` instead. """
[docs] def __init__(self, sandbox: SandboxAdapter): """ Initialize read file tool. Args: sandbox: Sandbox adapter instance """ self.sandbox = sandbox
@property def name(self) -> str: """Tool name.""" return "read_file" @property def description(self) -> str: """Tool description.""" return ( "Read files from the sandbox workspace with type detection and chunked reading. " "Features: automatic file type detection (text/binary), support for large files via line range reading, " "binary file detection and handling, file size limits (10MB max for full read). " "Use cases: reading source code files, configuration files, documentation files, log files (with line ranges). " "⚠️ Note: This tool reads from the sandbox workspace (/workspace), NOT from the local machine. " "For reading skill files (stored locally), use `read_skill_file` instead. " "Prefer this tool over `run(\"cat\")` for file reading." ) @property def output_semantic_type(self) -> OutputSemanticType: """Return semantic type: FILE_CONTENT.""" return OutputSemanticType.FILE_CONTENT
[docs] def validate_args(self, args: Dict[str, Any]) -> tuple[bool, Optional[str]]: """Validate arguments.""" if "path" not in args: return False, "Missing required argument: 'path'" if not isinstance(args["path"], str): return False, "Argument 'path' must be a string" if "offset" in args and not isinstance(args.get("offset"), int): return False, "Argument 'offset' must be an integer" if "limit" in args and not isinstance(args.get("limit"), int): return False, "Argument 'limit' must be an integer" return True, None
[docs] def execute(self, args: Dict[str, Any]) -> ToolResult: """ Execute read file tool. **Args:** args: Tool arguments dictionary - path (str, required): File path. Relative paths are relative to /workspace. Absolute paths are used as-is. - offset (int, optional): Start line number (1-indexed). Default: 1 (start of file). Use this for reading large files in chunks. - limit (int, optional): Number of lines to read. If not specified, reads from offset to end of file. Use this for reading large files in chunks. **Returns:** ToolResult with: - ok (bool): True if file was read successfully (no errors in stderr) - stdout (str): File content (or metadata for binary/large files) - stderr (str): Error messages if any (empty string means success) - meta (dict): Contains path, file_type, file_size, start_line, end_line **Examples:** # Read entire file read_file(path="src/main.py") # Read first 100 lines read_file(path="src/main.py", offset=1, limit=100) # Read lines 50-100 read_file(path="src/main.py", offset=50, limit=51) # Read from line 200 to end read_file(path="src/main.py", offset=200) **Behavior:** - **Text files**: Returns file content in stdout - **Binary files**: Returns metadata message (file type, size) instead of content - **Large files (>10MB)**: Returns metadata message, suggests using line ranges - **File not found**: Returns ok=False with error message in stderr - **Line ranges**: If offset/limit specified, only reads those lines **Path Handling:** - All commands run with workdir="/workspace" - Relative paths are resolved relative to /workspace - Absolute paths are used as-is **Error Handling:** - Success is determined by stderr content, not exit code - If stderr is empty, the operation succeeded - Check stderr for specific error messages if ok=False """ path = args["path"] offset = args.get("offset") limit = args.get("limit") # Handle paths - sandbox runs in /workspace directory # Relative paths are already relative to /workspace (consistent with other tools) # Use relative path directly since workdir is /workspace path_escaped = shlex.quote(path) # Check if file exists (relative to /workspace) check_cmd = f"test -f {path_escaped} && echo 'exists' || echo 'not_found'" check_result = self._run_command(check_cmd, timeout_sec=5) if "not_found" in check_result["stdout"]: return ToolResult( ok=False, stdout="", stderr=f"File not found: {path}", meta={"path": path, "file_type": "not_found"}, ) # Detect file type detect_cmd = f"file -b {path_escaped} 2>/dev/null || echo 'unknown'" file_type_result = self._run_command(detect_cmd, timeout_sec=5) file_type_info = ( file_type_result["stdout"].strip() if not file_type_result.get("stderr", "").strip() else "unknown" ) # Check file size size_cmd = f"wc -c < {path_escaped} 2>/dev/null || echo '0'" size_result = self._run_command(size_cmd, timeout_sec=5) try: file_size = int(size_result["stdout"].strip()) except (ValueError, AttributeError): file_size = 0 # Check if binary is_binary = False if file_size > 0: check_binary_cmd = f"head -c 512 {path_escaped} 2>/dev/null | od -An -tx1 | grep -q ' 00 ' && echo 'binary' || echo 'text'" binary_check = self._run_command(check_binary_cmd, timeout_sec=5) if not binary_check.get("stderr", "").strip(): is_binary = "binary" in binary_check["stdout"] else: is_binary = ( "text" not in file_type_info.lower() and "ascii" not in file_type_info.lower() ) # If binary or too large, return metadata only max_file_size = 10 * 1024 * 1024 # 10MB is_text_file = ( "text" in file_type_info.lower() or "ascii" in file_type_info.lower() or "python" in file_type_info.lower() or "script" in file_type_info.lower() ) if (is_binary and not is_text_file) or file_size > max_file_size: return ToolResult( ok=True, stdout=f"[File: {path}]\nType: {file_type_info}\nSize: {file_size} bytes\n\nThis file is binary or too large to display. Use specific line ranges if needed.", stderr="", meta={ "path": path, "file_type": "binary" if is_binary else "large", "file_size": file_size, "file_type_info": file_type_info, }, ) # Determine line range start_line = offset if offset is not None else 1 if limit is not None: end_line = start_line + limit - 1 if start_line > 0 else limit else: end_line = None # Read file content if end_line is None: # Read from start_line to end of file if start_line == 1: cmd = f"cat {path_escaped} 2>/dev/null" else: # Use sed to read from start_line to end cmd = f"sed -n '{start_line},$p' {path_escaped} 2>/dev/null" else: cmd = f"sed -n '{start_line},{end_line}p' {path_escaped} 2>/dev/null || head -n {end_line} {path_escaped} | tail -n +{start_line}" result = self._run_command(cmd, timeout_sec=30) # Determine success based on stderr content, not exit code stderr = result.get("stderr", "") exit_code = result.get("exitCode", result.get("exit_code", -1)) ok = not bool(stderr.strip()) # Success if no error messages in stderr return ToolResult( ok=ok, stdout=result.get("stdout", ""), stderr=stderr, meta={ "path": path, "file_type": "text", "exitCode": exit_code, # Include exit code for proper display "file_size": file_size, "file_type_info": file_type_info, "start_line": start_line, "end_line": end_line, }, )
def _run_command(self, cmd: str, timeout_sec: int = 600) -> Dict[str, Any]: """Run a shell command in sandbox.""" return self.sandbox.exec_shell( command=cmd, workdir="/workspace", timeout_seconds=timeout_sec, )