diff --git a/cross_eval/agent.py b/cross_eval/agent.py index b52acf8..f97d1f7 100644 --- a/cross_eval/agent.py +++ b/cross_eval/agent.py @@ -419,8 +419,11 @@ def invoke_agent_agentic( if agent.reasoning_effort and _supports_reasoning_effort(agent.command): cmd.extend(["-c", f'model_reasoning_effort="{agent.reasoning_effort}"']) - # Strip stdin sentinel ("-") from args for agentic mode - args = [a for a in agent.args if a != "-"] + # Strip print-mode flags and stdin sentinel from args for agentic mode. + # -p / --print makes Claude a one-shot text completer that cannot use tools + # or modify files, which defeats the entire purpose of agentic execution. + _STRIP_FOR_AGENTIC = {"-", "-p", "--print"} + args = [a for a in agent.args if a not in _STRIP_FOR_AGENTIC] cmd.extend(args) # System prompt via flag if supported