release: cut 0.2.0 baseline

This commit is contained in:
chungyeong
2026-03-13 21:47:54 +09:00
parent 204e071b74
commit 941304398d
15 changed files with 1930 additions and 270 deletions

View File

@@ -10,9 +10,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
from cross_eval.agent import AgentInvocationError, invoke_agent
from cross_eval.agent import AgentInvocationError, invoke_agent, invoke_agent_agentic
from cross_eval.worktree import WorktreeError
from cross_eval.config import try_reload_config
from cross_eval.models import (
AgentConfig,
AgentResult,
IterationResult,
PipelineConfig,
@@ -21,6 +23,11 @@ from cross_eval.models import (
)
from cross_eval.prompts import render_template, resolve_template, set_language
from cross_eval.report import build_report
from cross_eval.runtime_env import (
build_execution_policy,
build_runtime_environment,
summarize_environment,
)
logger = logging.getLogger(__name__)
@@ -48,6 +55,104 @@ def _make_run_dir(config: PipelineConfig) -> Path:
return run_dir
def _commit_iteration(
worktree_path: Path,
label: str,
iteration: int,
verdict: str | None,
) -> None:
"""Intermediate commit after each agentic iteration.
This resets the diff baseline so the next iteration only captures new changes.
"""
from cross_eval.worktree import commit_worktree
committed = commit_worktree(
worktree_path,
f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})",
)
if committed:
logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict)
def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
"""Check if any step uses an agentic agent."""
return any(
config.agents.get(s.agent, AgentConfig(name="", command="")).agentic
for s in steps
)
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str]:
"""Create a shared worktree for the entire pipeline run.
1. Generate branch name (cross-eval/<preset>_<timestamp>)
2. Create branch from HEAD
3. Create worktree on that branch
Returns (worktree_path, branch_name).
"""
from cross_eval.worktree import create_worktree, make_branch_name
branch_name = make_branch_name(preset_name)
worktree_dir = run_dir / "work"
worktree_path = create_worktree(
base_cwd=cwd, work_dir=worktree_dir, branch_name=branch_name,
)
return worktree_path, branch_name
def _finalize_worktree(
cwd: Path,
worktree_path: Path,
branch_name: str,
preset_name: str,
final_verdict: str,
) -> str | None:
"""Commit changes on the branch, then remove the worktree.
The branch survives worktree removal and stays in the original repo.
Returns the branch name if changes were committed, None otherwise.
"""
from cross_eval.worktree import commit_worktree, remove_worktree
committed = False
try:
committed = commit_worktree(
worktree_path,
f"cross-eval: {preset_name} ({final_verdict})",
)
if committed:
logger.info(" Agentic changes committed on branch: %s", branch_name)
else:
logger.warning(" No agentic changes to commit (empty diff)")
except Exception:
logger.warning(" Failed to commit agentic changes", exc_info=True)
try:
remove_worktree(base_cwd=cwd, work_dir=worktree_path)
except Exception:
logger.warning("Failed to clean up worktree: %s", worktree_path)
# Check if branch has any commits beyond the base — if not, delete it
if not committed:
try:
# Check if branch has diverged from its base
result = subprocess.run(
["git", "log", "--oneline", f"HEAD..{branch_name}"],
cwd=cwd, capture_output=True, text=True,
)
if not result.stdout.strip():
# No commits on branch beyond base — clean up
subprocess.run(
["git", "branch", "-D", branch_name],
cwd=cwd, capture_output=True,
)
logger.info(" Deleted empty branch: %s", branch_name)
except Exception:
pass # best-effort cleanup
return branch_name if committed else None
def _run_simple_pipeline(
config: PipelineConfig,
run_dir: Path,
@@ -61,6 +166,15 @@ def _run_simple_pipeline(
set_language(config.language)
input_contents = _load_inputs(config)
runtime_env = _build_runtime_inputs(config, input_contents, cwd or Path(os.getcwd()))
# Setup shared worktree for agentic mode
worktree_path: Path | None = None
agentic_branch_name: str | None = None
if not dry_run and _has_agentic_steps(config, config.pipeline):
worktree_path, agentic_branch_name = _setup_worktree(
cwd, run_dir, config.preset_name,
)
feedback = "(no feedback — first iteration)"
iterations: list[IterationResult] = []
@@ -71,99 +185,114 @@ def _run_simple_pipeline(
escalated_issues: list[str] = []
all_feedbacks: list[str] = []
for i in range(1, config.max_iterations + 1):
config = try_reload_config(config)
set_language(config.language)
_refresh_inputs(config, input_contents)
try:
for i in range(1, config.max_iterations + 1):
config = try_reload_config(config)
set_language(config.language)
_refresh_inputs(config, input_contents)
runtime_env = _build_runtime_inputs(config, input_contents, cwd)
logger.info("=" * 50)
logger.info(" Iteration %d/%d", i, config.max_iterations)
logger.info("=" * 50)
logger.info("=" * 50)
logger.info(" Iteration %d/%d", i, config.max_iterations)
logger.info("=" * 50)
step_outputs, step_results, verdict = _run_steps(
config.pipeline, config, input_contents, feedback,
i, config.max_iterations, cwd, timeout, dry_run,
run_dir=run_dir, output_iter=i,
)
step_outputs, step_results, verdict = _run_steps(
config.pipeline, config, input_contents, feedback,
i, config.max_iterations, cwd, timeout, dry_run,
run_dir=run_dir, output_iter=i,
worktree_path=worktree_path,
runtime_env=runtime_env,
)
iter_result = IterationResult(
iteration=i,
step_results=step_results,
step_outputs=step_outputs,
verdict=verdict,
)
warning = _detect_repeated_aggregate(
config.pipeline, step_outputs, aggregate_history, iteration=i,
)
if warning:
iter_result.repeated_aggregate_warning = warning
aggregate_warnings.append(warning)
logger.warning(" %s", warning)
# Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None:
_commit_iteration(worktree_path, config.preset_name, i, verdict)
iter_result.feedback = _collect_feedback(config.pipeline, step_outputs)
feedback = iter_result.feedback or feedback
all_feedbacks.append(feedback)
iter_result = IterationResult(
iteration=i,
step_results=step_results,
step_outputs=step_outputs,
verdict=verdict,
)
warning = _detect_repeated_aggregate(
config.pipeline, step_outputs, aggregate_history, iteration=i,
)
if warning:
iter_result.repeated_aggregate_warning = warning
aggregate_warnings.append(warning)
logger.warning(" %s", warning)
# Extract tracker from verdict/review steps for next iteration
for step in config.pipeline:
if step.verdict or step.role == "review":
tracker = _extract_senior_tracker(
step_outputs.get(step.output_key, ""),
)
if tracker:
input_contents["previous_senior_tracker"] = tracker
iter_result.feedback = _collect_feedback(config.pipeline, step_outputs)
feedback = iter_result.feedback or feedback
all_feedbacks.append(feedback)
iterations.append(iter_result)
# ESCALATE check (highest priority)
if verdict == "ESCALATE":
final_verdict = "ESCALATE"
# Extract escalation details from verdict step outputs
# Extract tracker from verdict/review steps for next iteration
for step in config.pipeline:
if step.verdict:
esc = _extract_escalated_issues(
if step.verdict or step.role == "review":
tracker = _extract_senior_tracker(
step_outputs.get(step.output_key, ""),
)
if esc:
escalated_issues.append(esc)
iter_result.escalated_issues = esc
logger.info(" ESCALATE at iteration %d — stopping loop.", i)
break
if tracker:
input_contents["previous_senior_tracker"] = tracker
if verdict == "PASS":
final_verdict = "PASS"
if i >= config.min_iterations:
logger.info(" PASS at iteration %d (min=%d reached)!", i, config.min_iterations)
iterations.append(iter_result)
# ESCALATE check (highest priority)
if verdict == "ESCALATE":
final_verdict = "ESCALATE"
for step in config.pipeline:
if step.verdict:
esc = _extract_escalated_issues(
step_outputs.get(step.output_key, ""),
)
if esc:
escalated_issues.append(esc)
iter_result.escalated_issues = esc
logger.info(" ESCALATE at iteration %d — stopping loop.", i)
break
else:
logger.info(
" PASS at iteration %d, but min_iterations=%d — continuing",
i, config.min_iterations,
)
# Auto-escalate: no senior/aggregator + repeated FAIL
has_aggregator = config.seniors or any(
s.prompt_template == "default:aggregate-review" for s in config.pipeline
)
if (
verdict == "FAIL"
and not has_aggregator
and i >= 2
and _detect_auto_escalate(all_feedbacks[:-1], feedback)
):
final_verdict = "ESCALATE"
auto_msg = (
f"Auto-escalated: same issues detected across {i} iterations "
f"without resolution (no senior reviewer configured)."
if verdict == "PASS":
final_verdict = "PASS"
if i >= config.min_iterations:
logger.info(" PASS at iteration %d (min=%d reached)!", i, config.min_iterations)
break
else:
logger.info(
" PASS at iteration %d, but min_iterations=%d — continuing",
i, config.min_iterations,
)
# Auto-escalate: no senior/aggregator + repeated FAIL
has_aggregator = config.seniors or any(
s.prompt_template == "default:aggregate-review" for s in config.pipeline
)
escalated_issues.append(auto_msg)
iter_result.escalated_issues = auto_msg
logger.info(" AUTO-ESCALATE at iteration %d", i)
break
if (
verdict == "FAIL"
and not has_aggregator
and i >= 2
and _detect_auto_escalate(all_feedbacks[:-1], feedback)
):
final_verdict = "ESCALATE"
auto_msg = (
f"Auto-escalated: same issues detected across {i} iterations "
f"without resolution (no senior reviewer configured)."
)
escalated_issues.append(auto_msg)
iter_result.escalated_issues = auto_msg
logger.info(" AUTO-ESCALATE at iteration %d", i)
break
if dry_run:
logger.info(" (dry-run: stopping after iteration 1)")
break
if dry_run:
logger.info(" (dry-run: stopping after iteration 1)")
break
finally:
agentic_branch: str | None = None
if worktree_path is not None and agentic_branch_name is not None:
agentic_branch = _finalize_worktree(
cwd, worktree_path, agentic_branch_name,
config.preset_name, final_verdict,
)
total_duration = time.monotonic() - start_time
@@ -174,6 +303,7 @@ def _run_simple_pipeline(
run_dir=run_dir,
repeated_aggregate_warnings=aggregate_warnings,
escalated_issues=escalated_issues,
agentic_branch=agentic_branch,
)
if not dry_run:
@@ -195,6 +325,16 @@ def _run_phased_pipeline(
set_language(config.language)
input_contents = _load_inputs(config)
runtime_env = _build_runtime_inputs(config, input_contents, cwd)
# Setup shared worktree for agentic mode
all_phase_steps = [s for p in config.phases for s in p.steps]
worktree_path: Path | None = None
agentic_branch_name: str | None = None
if not dry_run and _has_agentic_steps(config, all_phase_steps):
worktree_path, agentic_branch_name = _setup_worktree(
cwd, run_dir, config.preset_name,
)
iterations: list[IterationResult] = []
feedback = "(no feedback — first iteration)"
@@ -207,152 +347,171 @@ def _run_phased_pipeline(
all_feedbacks: list[str] = []
escalated = False
for phase_idx, phase in enumerate(config.phases):
if escalated:
break
try:
for phase_idx, phase in enumerate(config.phases):
if escalated:
break
logger.info("=" * 60)
logger.info(
" Phase: %s (max_iter=%d, consecutive_pass=%d)",
phase.name, phase.max_iterations, phase.consecutive_pass,
)
logger.info("=" * 60)
consecutive_passes = 0
phase_converged = False
for pi in range(1, phase.max_iterations + 1):
global_iter += 1
config = try_reload_config(config)
set_language(config.language)
_refresh_inputs(config, input_contents)
logger.info("-" * 50)
logger.info("=" * 60)
logger.info(
" [%s] Iteration %d/%d (global: v%d)",
phase.name, pi, phase.max_iterations, global_iter,
" Phase: %s (max_iter=%d, consecutive_pass=%d)",
phase.name, phase.max_iterations, phase.consecutive_pass,
)
logger.info("-" * 50)
logger.info("=" * 60)
step_outputs, step_results, verdict = _run_steps(
phase.steps, config, input_contents, feedback,
pi, phase.max_iterations, cwd, timeout, dry_run,
run_dir=run_dir, output_iter=global_iter, phase_name=phase.name,
)
consecutive_passes = 0
phase_converged = False
iter_result = IterationResult(
iteration=global_iter,
step_results=step_results,
step_outputs=step_outputs,
verdict=verdict,
phase_name=phase.name,
)
phase_history = aggregate_history_by_phase.setdefault(phase.name, {})
warning = _detect_repeated_aggregate(
phase.steps, step_outputs, phase_history, iteration=global_iter,
phase_name=phase.name,
)
if warning:
iter_result.repeated_aggregate_warning = warning
aggregate_warnings.append(warning)
logger.warning(" %s", warning)
for pi in range(1, phase.max_iterations + 1):
global_iter += 1
iter_result.feedback = _collect_feedback(phase.steps, step_outputs)
feedback = iter_result.feedback or feedback
all_feedbacks.append(feedback)
config = try_reload_config(config)
set_language(config.language)
_refresh_inputs(config, input_contents)
runtime_env = _build_runtime_inputs(config, input_contents, cwd)
# Extract tracker from verdict/review steps
for step in phase.steps:
if step.verdict or step.role == "review":
tracker = _extract_senior_tracker(
step_outputs.get(step.output_key, ""),
logger.info("-" * 50)
logger.info(
" [%s] Iteration %d/%d (global: v%d)",
phase.name, pi, phase.max_iterations, global_iter,
)
logger.info("-" * 50)
step_outputs, step_results, verdict = _run_steps(
phase.steps, config, input_contents, feedback,
pi, phase.max_iterations, cwd, timeout, dry_run,
run_dir=run_dir, output_iter=global_iter, phase_name=phase.name,
worktree_path=worktree_path,
runtime_env=runtime_env,
)
# Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None:
_commit_iteration(
worktree_path, f"{config.preset_name}/{phase.name}",
global_iter, verdict,
)
if tracker:
input_contents["previous_senior_tracker"] = tracker
iterations.append(iter_result)
iter_result = IterationResult(
iteration=global_iter,
step_results=step_results,
step_outputs=step_outputs,
verdict=verdict,
phase_name=phase.name,
)
phase_history = aggregate_history_by_phase.setdefault(phase.name, {})
warning = _detect_repeated_aggregate(
phase.steps, step_outputs, phase_history, iteration=global_iter,
phase_name=phase.name,
)
if warning:
iter_result.repeated_aggregate_warning = warning
aggregate_warnings.append(warning)
logger.warning(" %s", warning)
# ESCALATE check
if verdict == "ESCALATE":
final_verdict = "ESCALATE"
iter_result.feedback = _collect_feedback(phase.steps, step_outputs)
feedback = iter_result.feedback or feedback
all_feedbacks.append(feedback)
# Extract tracker from verdict/review steps
for step in phase.steps:
if step.verdict:
esc = _extract_escalated_issues(
if step.verdict or step.role == "review":
tracker = _extract_senior_tracker(
step_outputs.get(step.output_key, ""),
)
if esc:
escalated_issues.append(esc)
iter_result.escalated_issues = esc
logger.info(
" [%s] ESCALATE at iteration %d — stopping.",
phase.name, pi,
)
escalated = True
break
if tracker:
input_contents["previous_senior_tracker"] = tracker
if verdict is None:
logger.info(
" [%s] completed (no verdict step; single-pass phase)",
phase.name,
)
phase_converged = True
break
iterations.append(iter_result)
if verdict == "PASS":
consecutive_passes += 1
logger.info(
" [%s] PASS (%d/%d consecutive)",
phase.name, consecutive_passes, phase.consecutive_pass,
)
if consecutive_passes >= phase.consecutive_pass:
# ESCALATE check
if verdict == "ESCALATE":
final_verdict = "ESCALATE"
for step in phase.steps:
if step.verdict:
esc = _extract_escalated_issues(
step_outputs.get(step.output_key, ""),
)
if esc:
escalated_issues.append(esc)
iter_result.escalated_issues = esc
logger.info(
" [%s] Converged! %d consecutive PASSes.",
phase.name, phase.consecutive_pass,
" [%s] ESCALATE at iteration %d — stopping.",
phase.name, pi,
)
escalated = True
break
if verdict is None:
logger.info(
" [%s] completed (no verdict step; single-pass phase)",
phase.name,
)
phase_converged = True
break
else:
consecutive_passes = 0
# Auto-escalate in phased pipeline
has_aggregator = config.seniors or any(
s.prompt_template == "default:aggregate-review" for s in phase.steps
)
if (
verdict == "FAIL"
and not has_aggregator
and pi >= 2
and _detect_auto_escalate(all_feedbacks[:-1], feedback)
):
final_verdict = "ESCALATE"
auto_msg = (
f"Auto-escalated: same issues detected across {pi} iterations "
f"in phase '{phase.name}' without resolution."
if verdict == "PASS":
consecutive_passes += 1
logger.info(
" [%s] PASS (%d/%d consecutive)",
phase.name, consecutive_passes, phase.consecutive_pass,
)
if consecutive_passes >= phase.consecutive_pass:
logger.info(
" [%s] Converged! %d consecutive PASSes.",
phase.name, phase.consecutive_pass,
)
phase_converged = True
break
else:
consecutive_passes = 0
# Auto-escalate in phased pipeline
has_aggregator = config.seniors or any(
s.prompt_template == "default:aggregate-review" for s in phase.steps
)
escalated_issues.append(auto_msg)
iter_result.escalated_issues = auto_msg
logger.info(" [%s] AUTO-ESCALATE at iteration %d", phase.name, pi)
escalated = True
if (
verdict == "FAIL"
and not has_aggregator
and pi >= 2
and _detect_auto_escalate(all_feedbacks[:-1], feedback)
):
final_verdict = "ESCALATE"
auto_msg = (
f"Auto-escalated: same issues detected across {pi} iterations "
f"in phase '{phase.name}' without resolution."
)
escalated_issues.append(auto_msg)
iter_result.escalated_issues = auto_msg
logger.info(" [%s] AUTO-ESCALATE at iteration %d", phase.name, pi)
escalated = True
break
if dry_run:
break
if escalated:
break
if dry_run:
break
if phase_converged:
logger.info(" Phase '%s' completed: CONVERGED", phase.name)
else:
logger.info(
" Phase '%s' completed: max iterations (%d) reached",
phase.name, phase.max_iterations,
)
if escalated:
break
if phase_idx == len(config.phases) - 1:
final_verdict = "PASS" if phase_converged else "MAX_ITERATIONS_REACHED"
if phase_converged:
logger.info(" Phase '%s' completed: CONVERGED", phase.name)
else:
logger.info(
" Phase '%s' completed: max iterations (%d) reached",
phase.name, phase.max_iterations,
finally:
agentic_branch: str | None = None
if worktree_path is not None and agentic_branch_name is not None:
agentic_branch = _finalize_worktree(
cwd, worktree_path, agentic_branch_name,
config.preset_name, final_verdict,
)
if phase_idx == len(config.phases) - 1:
final_verdict = "PASS" if phase_converged else "MAX_ITERATIONS_REACHED"
total_duration = time.monotonic() - start_time
pipeline_result = PipelineResult(
@@ -362,6 +521,7 @@ def _run_phased_pipeline(
run_dir=run_dir,
repeated_aggregate_warnings=aggregate_warnings,
escalated_issues=escalated_issues,
agentic_branch=agentic_branch,
)
if not dry_run:
@@ -463,6 +623,8 @@ def _run_steps(
run_dir: Path,
output_iter: int,
phase_name: str | None = None,
worktree_path: Path | None = None,
runtime_env: dict[str, str] | None = None,
) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
"""Execute all steps in one iteration, parallelizing where possible."""
step_outputs: dict[str, str] = {}
@@ -473,21 +635,23 @@ def _run_steps(
for batch in batches:
if len(batch) == 1:
# Single step — run directly
step = batch[0]
_execute_step(
step, config, input_contents, feedback,
iteration, max_iterations, cwd, timeout, dry_run,
step_outputs, step_results,
run_dir=run_dir, output_iter=output_iter, phase_name=phase_name,
run_dir=run_dir, output_iter=output_iter,
phase_name=phase_name, worktree_path=worktree_path,
runtime_env=runtime_env,
)
else:
# Parallel batch — run with ThreadPoolExecutor
_execute_parallel_batch(
batch, config, input_contents, feedback,
iteration, max_iterations, cwd, timeout, dry_run,
step_outputs, step_results,
run_dir=run_dir, output_iter=output_iter, phase_name=phase_name,
run_dir=run_dir, output_iter=output_iter,
phase_name=phase_name, worktree_path=worktree_path,
runtime_env=runtime_env,
)
# Extract verdict from all verdict steps (ALL must PASS; ESCALATE wins over all)
@@ -506,6 +670,25 @@ def _run_steps(
return step_outputs, step_results, verdict
def _invoke_agentic(
agent_config: AgentConfig,
prompt: str,
step_name: str,
*,
worktree_path: Path,
env: dict[str, str] | None = None,
timeout: int | None = None,
quiet: bool = False,
) -> AgentResult:
"""Run an agent in agentic mode using an existing worktree."""
return invoke_agent_agentic(
agent_config, prompt, step_name,
worktree_path=worktree_path,
env=env,
timeout=timeout, quiet=quiet,
)
def _execute_step(
step: StepConfig,
config: PipelineConfig,
@@ -523,6 +706,8 @@ def _execute_step(
output_iter: int,
phase_name: str | None = None,
quiet: bool = False,
worktree_path: Path | None = None,
runtime_env: dict[str, str] | None = None,
) -> None:
"""Execute a single step, updating step_outputs and step_results in place."""
if not quiet:
@@ -542,6 +727,7 @@ def _execute_step(
# 4. Render prompt
prompt = render_template(template, context)
prompt = _augment_prompt_with_runtime_context(prompt, context)
# 5. Dry run: print and skip
if dry_run:
@@ -555,10 +741,21 @@ def _execute_step(
# 6. Invoke agent
agent_config = config.agents[step.agent]
try:
result = invoke_agent(
agent_config, prompt, step.name,
cwd=cwd, timeout=timeout, quiet=quiet,
)
if agent_config.agentic and worktree_path:
result = _invoke_agentic(
agent_config, prompt, step.name,
worktree_path=worktree_path,
env=runtime_env,
timeout=timeout, quiet=quiet,
)
else:
# When worktree exists, run non-agentic agents (reviewers) in
# the worktree too so they can inspect the modified files.
effective_cwd = worktree_path if worktree_path else cwd
result = invoke_agent(
agent_config, prompt, step.name,
cwd=effective_cwd, env=runtime_env, timeout=timeout, quiet=quiet,
)
except subprocess.TimeoutExpired as e:
stdout = (e.stdout or b"") if isinstance(e.stdout, bytes) else (e.stdout or "")
stderr = (e.stderr or b"") if isinstance(e.stderr, bytes) else (e.stderr or "")
@@ -625,6 +822,8 @@ def _execute_parallel_batch(
run_dir: Path,
output_iter: int,
phase_name: str | None = None,
worktree_path: Path | None = None,
runtime_env: dict[str, str] | None = None,
) -> None:
"""Execute multiple steps in parallel using threads."""
agent_names = ", ".join(s.agent for s in batch)
@@ -640,6 +839,26 @@ def _execute_parallel_batch(
)
return
# Agentic steps cannot run in parallel (they share a worktree)
agentic_in_batch = [
s for s in batch
if config.agents.get(s.agent, AgentConfig(name="", command="")).agentic
]
if len(agentic_in_batch) > 1:
logger.warning(
" [parallel] %d agentic steps cannot run concurrently — running sequentially",
len(agentic_in_batch),
)
for step in batch:
_execute_step(
step, config, input_contents, feedback,
iteration, max_iterations, cwd, timeout, dry_run,
step_outputs, step_results,
run_dir=run_dir, output_iter=output_iter,
phase_name=phase_name, worktree_path=worktree_path,
)
return
# Snapshot context before parallel execution (all steps see same state)
context_snapshot = dict(input_contents)
context_snapshot.update(step_outputs)
@@ -666,12 +885,22 @@ def _execute_parallel_batch(
if step.context_override:
context = _apply_context_override(context, step.context_override)
prompt = render_template(template, context)
prompt = _augment_prompt_with_runtime_context(prompt, context)
agent_config = config.agents[step.agent]
result = invoke_agent(
agent_config, prompt, step.name,
cwd=cwd, timeout=timeout, quiet=True,
)
if agent_config.agentic and worktree_path:
result = _invoke_agentic(
agent_config, prompt, step.name,
worktree_path=worktree_path,
env=runtime_env,
timeout=timeout, quiet=True,
)
else:
effective_cwd = worktree_path if worktree_path else cwd
result = invoke_agent(
agent_config, prompt, step.name,
cwd=effective_cwd, env=runtime_env, timeout=timeout, quiet=True,
)
return step.output_key, result.output, result
with ThreadPoolExecutor(max_workers=len(batch)) as executor:
@@ -765,6 +994,35 @@ def _build_context(
return context
def _build_runtime_inputs(
config: PipelineConfig,
input_contents: dict[str, str],
cwd: Path,
) -> dict[str, str]:
"""Load runtime env and expose safe execution hints to prompts."""
env, loaded_files, loaded_values = build_runtime_environment(config.execution, cwd)
input_contents["execution_policy"] = build_execution_policy(config.execution)
input_contents["environment_context"] = summarize_environment(
config.execution, loaded_files, env, loaded_values,
)
return env
def _augment_prompt_with_runtime_context(
prompt: str,
context: dict[str, str],
) -> str:
"""Append execution/env guidance without requiring every template to include placeholders."""
extras: list[str] = []
if context.get("execution_policy"):
extras.append("## Execution Policy\n" + context["execution_policy"])
if context.get("environment_context"):
extras.append("## Environment Context\n" + context["environment_context"])
if not extras:
return prompt
return prompt.rstrip() + "\n\n" + "\n\n".join(extras) + "\n"
def _apply_context_override(
context: dict[str, str],
overrides: dict[str, str],