diff --git a/cross_eval/agent.py b/cross_eval/agent.py index 3844520..968e79e 100644 --- a/cross_eval/agent.py +++ b/cross_eval/agent.py @@ -28,6 +28,12 @@ _NO_CHANGE_ACK_MARKERS = ( "no modifications were necessary", "no update was necessary", "already satisfied", + "no changes needed", + "no fixes needed", + "everything is correct", + "code is correct as-is", + "already correct", + "no action required", ) _CHANGE_CLAIM_MARKERS = ( "summary of all changes made", @@ -59,6 +65,14 @@ _CHANGE_CLAIM_MARKERS = ( "wrote the code", "refactored", "i refactored", + "completed all the changes", + "finished implementing", + "all tasks completed", + "done with the implementation", + "successfully implemented", + "completed the implementation", + "all changes have been made", + "changes are complete", ) @@ -419,11 +433,10 @@ def invoke_agent_agentic( if agent.reasoning_effort and _supports_reasoning_effort(agent.command): cmd.extend(["-c", f'model_reasoning_effort="{agent.reasoning_effort}"']) - # Strip print-mode flags and stdin sentinel from args for agentic mode. - # -p / --print makes Claude a one-shot text completer that cannot use tools - # or modify files, which defeats the entire purpose of agentic execution. - _STRIP_FOR_AGENTIC = {"-", "-p", "--print"} - args = [a for a in agent.args if a not in _STRIP_FOR_AGENTIC] + # Strip stdin sentinel ("-") from args for agentic mode. + # Keep -p/--print: Claude -p mode still has full tool access (Edit, Write, + # Bash, etc.) and is the correct mode for non-interactive subprocess use. + args = [a for a in agent.args if a != "-"] cmd.extend(args) # System prompt via flag if supported @@ -441,16 +454,9 @@ def invoke_agent_agentic( else: input_data = prompt else: - # claude: use positional arg with a pointer to the task file - # (avoids OS arg length limits for large prompts). - # Send empty stdin so Claude receives EOF and exits after processing - # instead of hanging in interactive mode waiting for more input. - input_data = "" - cmd.append( - f"Read the task file at {task_file} and execute all instructions in it. " - f"Work only inside the current directory and do not modify files " - f"outside it." - ) + # claude -p: deliver prompt via stdin (same as codex). + # -p mode is non-interactive and reads from stdin, then exits. + input_data = prompt cmd_preview = " ".join(cmd[:6]) logger.debug(