Source code for atloop.tools.system.run_command
"""Run command tool."""
from typing import Any, Dict, Optional
from atloop.runtime.sandbox_adapter import SandboxAdapter
from atloop.tools.base import BaseTool, ToolResult
from atloop.tools.output_semantic_type import OutputSemanticType
[docs]
class RunCommandTool(BaseTool):
"""
Tool for executing shell commands in the sandbox workspace.
**⚠️ This is the primary tool for executing system commands!**
**Use cases:**
- Running build commands (make, npm, pip, etc.)
- Running tests (pytest, unittest, etc.)
- Executing scripts (python3, node, etc.)
- File operations (ls, cat, grep, find, etc.)
- System checks (which, type, etc.)
**Important notes:**
- Commands run in /workspace directory
- Success is determined by stderr content, NOT exit code
- Many commands return exit_code=0 even with errors
- Always check stdout and stderr content to judge success
"""
[docs]
def __init__(self, sandbox: SandboxAdapter):
"""
Initialize run command tool.
Args:
sandbox: Sandbox adapter instance
"""
self.sandbox = sandbox
@property
def name(self) -> str:
"""Tool name."""
return "run"
@property
def description(self) -> str:
"""Tool description."""
return (
"Execute shell commands in the sandbox workspace. "
"This is the primary tool for executing system commands. "
"Use cases: running build commands (make, npm, pip), running tests (pytest, unittest), "
"executing scripts (python3, node), file operations (ls, cat, grep, find), system checks (which, type). "
"Commands run in /workspace directory. "
"⚠️ IMPORTANT: Success is determined by stderr content, NOT exit code. "
"Many commands return exit_code=0 even with errors. Always check stdout and stderr content to judge success. "
"For complex shell scripts, prefer `run_shell_script_string`. "
"For Python code, prefer `run_python_script_string`. "
"All `run` commands MUST use SHELL_COMMAND_<description> placeholder in JSON."
)
@property
def output_semantic_type(self) -> OutputSemanticType:
"""Return semantic type: EXECUTION_RESULT.
Note: File view commands are handled specially in OutputLimitStrategy
based on command content, not semantic type.
"""
return OutputSemanticType.EXECUTION_RESULT
[docs]
def validate_args(self, args: Dict[str, Any]) -> tuple[bool, Optional[str]]:
"""Validate arguments."""
if "cmd" not in args:
return False, "Missing required argument: 'cmd'"
if not isinstance(args["cmd"], str):
return False, "Argument 'cmd' must be a string"
return True, None
[docs]
def execute(self, args: Dict[str, Any]) -> ToolResult:
"""
Execute shell command in the sandbox workspace.
**⚠️ IMPORTANT**: This is the primary tool for executing system commands!
Use this instead of trying to execute commands through other means.
**Args:**
args: Tool arguments dictionary
- cmd (str, required): Shell command to execute. Can be any valid shell command.
Examples: "ls -la", "python3 script.py", "grep -rn 'pattern' .", etc.
- timeout_sec (int, optional): Timeout in seconds. Default: 600 (10 minutes).
Use shorter timeouts for quick commands, longer for builds/tests.
**Returns:**
ToolResult with:
- ok (bool): True if command succeeded (no errors in stderr)
- stdout (str): Standard output from the command
- stderr (str): Standard error from the command (empty string means success)
- meta (dict): Contains cmd, duration_ms
**Examples:**
# List files
run(cmd="ls -la")
# Run Python script
run(cmd="python3 script.py")
# Run tests
run(cmd="pytest tests/")
# Search for pattern
run(cmd="grep -rn 'def function' .")
# View file content
run(cmd="cat file.py")
run(cmd="head -n 50 file.py")
run(cmd="tail -n 20 file.py")
**⚠️ Critical: Success Determination**
- **DO NOT rely on exit_code** - it's unreliable!
- **Success = empty stderr** (no error messages)
- **Failure = non-empty stderr** (contains error messages)
- Many commands return exit_code=0 even with errors (e.g., pytest collection errors)
- Always read stdout and stderr content to judge success
**⚠️ Hard requirement: avoid “triple-layer quoting hell”**
If use python3 -c "....", you must use single quotes inside the string following "-c".
Example:
{
"tool": "run",
"args": {
"cmd": "python3 -c \\"import sys; sys.path.insert(0, '.'); from jira_client import JiraClient; client = JiraClient(); projects = client.get_projects(); print('Projects:'); for p in projects: print(f\\" {p['key']}: {p['name']}\\")\\""
}
}
**Common Commands:**
- File viewing: `cat`, `head`, `tail`, `less`, `more`
- File search: `grep`, `find`, `locate`
- Text processing: `sed`, `awk`, `cut`, `sort`, `uniq`
- File operations: `ls`, `pwd`, `cd`, `mkdir`, `rm`, `cp`, `mv`
- Python: `python3` (not `python`), `pip3` (not `pip`)
- Other: `echo`, `wc`, `diff`, `which`, `type`
**Error Handling:**
- Success is determined by stderr content, not exit code
- If stderr is empty, the command succeeded
- If stderr contains "error", "failed", "exception", etc., command likely failed
- Always check stderr content, not just ok field
**⚠️ IMPORTANT: For Complex Python/Shell Scripts, Use Specialized Tools**
DO NOT use `run` with `python3 -c "..."` or `bash -c "..."` for complex scripts!
This causes shell escaping issues (quotes, f-strings, etc.).
Instead, use:
- `run_python_script_string` for Python code execution (use PYTHON_SCRIPT_#N placeholder)
- `run_shell_script_string` for complex shell scripts (use SHELL_SCRIPT_#N placeholder)
Use `run` only for simple commands: ls, cat, grep, find, pwd, python3 script.py, etc.
"""
cmd = args["cmd"]
timeout_sec = args.get("timeout_sec", 600)
result = self.sandbox.exec_shell(
command=cmd,
workdir="/workspace",
timeout_seconds=timeout_sec,
)
stdout = result.get("stdout", "")
stderr = result.get("stderr", "")
exit_code = result.get("exitCode", result.get("exit_code", -1))
# Determine success based on stderr content, not exit code
# Many commands return exit_code=0 even with errors, or exit_code!=0 with no errors
ok = not bool(stderr.strip()) # Success if no error messages in stderr
return ToolResult(
ok=ok,
stdout=stdout,
stderr=stderr,
meta={
"cmd": cmd,
"duration_ms": result.get("durationMs", 0),
"exitCode": exit_code, # Include exit code for proper display
},
)