Files
dev-puppeteer/packages/run-engine/src/engine.test.ts
2026-05-13 21:44:58 +09:00

1831 lines
65 KiB
TypeScript

import { execFileSync } from "node:child_process";
import { randomUUID } from "node:crypto";
import {
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
realpathSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join, resolve } from "node:path";
import { and, eq, inArray } from "drizzle-orm";
import { afterEach, describe, expect, it } from "vitest";
import { DevflowError, loadPersonaFiles, loadTemplateFiles, validateArtifact } from "@devflow/core";
import {
type DbClient,
agentPersonas,
approvalDecisions,
approvalRequests,
artifacts,
createDbClient,
runBindings,
runEvents,
runPhases,
runs,
tuiSessions,
workflowTemplates,
} from "@devflow/db";
import {
FakeSessionAdapter,
type SessionAdapter,
type SessionHandle,
SessionManager,
type SessionRuntime,
type TranscriptChunk,
} from "@devflow/session";
import { DbRunEngine, sweepM4ProcessRestart } from "./engine.js";
const databaseUrl =
process.env.DATABASE_URL ?? "postgres://devflow:devflow@127.0.0.1:55432/devflow";
function sessionRuntime(db: DbClient["db"], adapter: SessionAdapter) {
return new SessionManager({ db, adapter });
}
function createGitRepo(): string {
const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-repo-")));
execFileSync("git", ["init", "-b", "main"], { cwd: repoPath, stdio: "ignore" });
writeFileSync(join(repoPath, "README.md"), "# Engine fixture\n");
execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" });
execFileSync(
"git",
[
"-c",
"user.name=Devflow Test",
"-c",
"user.email=devflow@example.test",
"commit",
"-m",
"initial",
],
{ cwd: repoPath, stdio: "ignore" },
);
return repoPath;
}
class PausesAfterPromptAcceptedFakeAdapter extends FakeSessionAdapter {
private paused = false;
constructor(private readonly db: DbClient["db"]) {
super({ writeDelayMs: 0 });
}
override async sendPrompt(
handle: Parameters<FakeSessionAdapter["sendPrompt"]>[0],
envelope: Parameters<FakeSessionAdapter["sendPrompt"]>[1],
): Promise<{ promptId: string }> {
const result = await super.sendPrompt(handle, envelope);
if (!this.paused) {
this.paused = true;
await this.db
.update(runs)
.set({ state: "paused", pausedFromState: "executing", updatedAt: new Date() })
.where(eq(runs.id, envelope.runId));
}
return result;
}
}
class DisposeCountingFakeAdapter extends FakeSessionAdapter {
disposeCalls = 0;
override async dispose(handle: Parameters<FakeSessionAdapter["dispose"]>[0]): Promise<void> {
this.disposeCalls += 1;
await super.dispose(handle);
}
}
class DisposeFailsFakeAdapter extends FakeSessionAdapter {
override async dispose(handle: Parameters<FakeSessionAdapter["dispose"]>[0]): Promise<void> {
throw new DevflowError("dispose failed", {
class: "recoverable",
code: "pane_briefly_unresponsive",
recoveryHint: `session=${handle.sessionId}`,
});
}
}
class CaptureOrderingFakeAdapter extends FakeSessionAdapter {
events: string[] = [];
failCapture = false;
override async *capture(
handle: Parameters<FakeSessionAdapter["capture"]>[0],
fromSeq: bigint,
): AsyncIterable<TranscriptChunk> {
this.events.push("capture");
if (this.failCapture) {
throw new DevflowError("transcript capture failed", {
class: "recoverable",
code: "pane_briefly_unresponsive",
});
}
yield* super.capture(handle, fromSeq);
}
override async dispose(handle: Parameters<FakeSessionAdapter["dispose"]>[0]): Promise<void> {
this.events.push("dispose");
await super.dispose(handle);
}
}
class CaptureFailsAfterDisposeFakeAdapter extends FakeSessionAdapter {
readonly disposedSessionIds = new Set<string>();
readonly events: string[] = [];
override async *capture(
handle: Parameters<FakeSessionAdapter["capture"]>[0],
fromSeq: bigint,
): AsyncIterable<TranscriptChunk> {
this.events.push("capture");
if (this.disposedSessionIds.has(handle.sessionId)) {
throw new DevflowError("tmux session already disposed", {
class: "recoverable",
code: "pane_briefly_unresponsive",
recoveryHint: `session=${handle.sessionId}`,
});
}
yield* super.capture(handle, fromSeq);
}
override async dispose(handle: Parameters<FakeSessionAdapter["dispose"]>[0]): Promise<void> {
this.events.push("dispose");
this.disposedSessionIds.add(handle.sessionId);
await super.dispose(handle);
}
}
class TerminalHandleRecordingRuntime implements SessionRuntime {
readonly adapter = new FakeSessionAdapter({ writeDelayMs: 0 });
readonly captureHandles: SessionHandle[] = [];
readonly disposeHandles: SessionHandle[] = [];
trackOperation<T>(operation: Promise<T>): Promise<T> {
return operation;
}
start(...args: Parameters<SessionRuntime["start"]>): ReturnType<SessionRuntime["start"]> {
return this.adapter.start(...args);
}
sendPrompt(
...args: Parameters<SessionRuntime["sendPrompt"]>
): ReturnType<SessionRuntime["sendPrompt"]> {
return this.adapter.sendPrompt(...args);
}
probe(...args: Parameters<SessionRuntime["probe"]>): ReturnType<SessionRuntime["probe"]> {
return this.adapter.probe(...args);
}
resume(...args: Parameters<SessionRuntime["resume"]>): ReturnType<SessionRuntime["resume"]> {
return this.adapter.resume(...args);
}
rebootstrap(
...args: Parameters<SessionRuntime["rebootstrap"]>
): ReturnType<SessionRuntime["rebootstrap"]> {
return this.adapter.rebootstrap(...args);
}
async *capture(handle: SessionHandle, fromSeq: bigint): ReturnType<SessionRuntime["capture"]> {
this.captureHandles.push(handle);
yield* this.adapter.capture(handle, fromSeq);
}
async dispose(handle: SessionHandle): Promise<void> {
this.disposeHandles.push(handle);
await this.adapter.dispose(handle);
}
}
describe("DbRunEngine", () => {
let client: DbClient | undefined;
const runIds: string[] = [];
const tempRoots: string[] = [];
afterEach(async () => {
if (client !== undefined) {
if (runIds.length > 0) {
const requests = await client.db
.select({ id: approvalRequests.id })
.from(approvalRequests)
.where(inArray(approvalRequests.runId, [...runIds]));
if (requests.length > 0) {
await client.db.delete(approvalDecisions).where(
inArray(
approvalDecisions.approvalRequestId,
requests.map((request) => request.id),
),
);
}
await client.db
.delete(approvalRequests)
.where(inArray(approvalRequests.runId, [...runIds]));
await client.db.delete(runs).where(inArray(runs.id, [...runIds]));
}
await client.close();
client = undefined;
}
for (const root of tempRoots.splice(0)) {
rmSync(root, { recursive: true, force: true });
}
runIds.length = 0;
});
it("runs development@1 through a spec approval gate and writes final reports", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Build the requested change using the fake development workflow.",
repoPath,
baseBranch: "main",
scenarios: {
spec: "ok",
phase_plan: "ok",
},
});
runIds.push(runId);
const awaiting = await engine.getStatus(runId);
expect(awaiting.run.state).toBe("awaiting_approval");
expect(awaiting.run.repoPath).toBe(repoPath);
expect(awaiting.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([
["spec", "awaiting_approval"],
["phase_plan", "pending"],
]);
expect(awaiting.approvals).toMatchObject([
{
gateKey: "spec_approved",
state: "pending",
},
]);
const specArtifacts = await client.db
.select({ schemaId: artifacts.schemaId, valid: artifacts.valid })
.from(artifacts)
.where(eq(artifacts.runId, runId));
expect(specArtifacts).toEqual([{ schemaId: "dev/spec@1", valid: true }]);
const approvalId = awaiting.approvals[0]?.id;
expect(approvalId).toBeDefined();
if (approvalId === undefined) {
throw new Error("approval id missing");
}
const clientToken = randomUUID();
await engine.signalApproval(runId, approvalId, "approve", clientToken, "approved");
await engine.signalApproval(runId, approvalId, "approve", clientToken, "approved");
const awaitingPlanApproval = await engine.getStatus(runId);
expect(awaitingPlanApproval.run.state).toBe("awaiting_approval");
expect(awaitingPlanApproval.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([
["spec", "completed"],
["phase_plan", "awaiting_approval"],
]);
const phasePlanApproval = pendingApproval(awaitingPlanApproval, "phase_plan_approved");
const planClientToken = randomUUID();
await engine.signalApproval(
runId,
phasePlanApproval.id,
"approve",
planClientToken,
"plan approved",
);
const completed = await engine.getStatus(runId);
expect(completed.run.state).toBe("completed");
expect(completed.run.finalReportPath).toMatch(/\.report\.md$/);
expect(completed.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([
["spec", "completed"],
["phase_plan", "completed"],
["implement", "completed"],
]);
expect(completed.approvals).toMatchObject([
{
gateKey: "spec_approved",
state: "approved",
},
{
gateKey: "phase_plan_approved",
state: "approved",
},
]);
const [bindingCount, eventRows, finalRun] = await Promise.all([
client.db.select().from(runBindings).where(eq(runBindings.runId, runId)),
client.db
.select({ seq: runEvents.seq, type: runEvents.type })
.from(runEvents)
.where(eq(runEvents.runId, runId))
.orderBy(runEvents.seq),
client.db
.select({ finalReportPath: runs.finalReportPath, worktreeRoot: runs.worktreeRoot })
.from(runs)
.where(eq(runs.id, runId))
.limit(1),
]);
expect(bindingCount.map((binding) => binding.roleId).sort()).toEqual([
"phase_planner",
"spec_writer",
]);
expect(eventRows.map((event) => event.seq)).toEqual(
eventRows.map((_, index) => BigInt(index + 1)),
);
expect(eventRows.map((event) => event.type)).toContain("approval.resolved");
expect(eventRows.filter((event) => event.type === "session.idle")).toHaveLength(3);
expect(eventRows.at(-1)?.type).toBe("run.completed");
const finalReportPath = finalRun[0]?.finalReportPath;
expect(finalReportPath).toBe(completed.run.finalReportPath);
expect(finalReportPath).not.toBeNull();
if (finalReportPath === null || finalReportPath === undefined) {
throw new Error("final report path missing");
}
expect(existsSync(finalReportPath)).toBe(true);
const reportJsonPath = finalReportPath.replace(/\.report\.md$/, ".report.json");
const report = JSON.parse(readFileSync(reportJsonPath, "utf8")) as unknown;
expect(validateArtifact("common/final-report@1", report)).toEqual({ ok: true });
expect(report).toMatchObject({
runId,
status: "completed",
approvals: [
{ gateKey: "spec_approved", state: "approved" },
{ gateKey: "phase_plan_approved", state: "approved" },
],
unresolved: [],
});
expect(finalRun[0]?.worktreeRoot).toBe(resolve(workspaceRoot, runId, "main"));
expect(readFileSync(join(finalRun[0]?.worktreeRoot ?? "", "README.md"), "utf8")).toContain(
"Engine fixture",
);
await client.db
.update(runs)
.set({ finalReportPath: null, updatedAt: new Date() })
.where(eq(runs.id, runId));
await engine.signalApproval(
runId,
phasePlanApproval.id,
"approve",
planClientToken,
"plan approved",
);
const repairedReport = await engine.getStatus(runId);
expect(repairedReport.run.finalReportPath).toMatch(/\.report\.md$/);
expect(existsSync(repairedReport.run.finalReportPath ?? "")).toBe(true);
});
it("increments attempts before emitting skipped planned phases", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Plan a phase with no artifact so it is skipped.",
repoPath,
baseBranch: "main",
scenarios: {
phase_plan: "skip-only",
},
});
runIds.push(runId);
const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved");
await engine.signalApproval(runId, specApproval.id, "approve", randomUUID());
const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved");
await engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID());
const phases = await client.db
.select({
attempts: runPhases.attempts,
phaseKey: runPhases.phaseKey,
state: runPhases.state,
})
.from(runPhases)
.where(eq(runPhases.runId, runId));
expect(phases.find((phase) => phase.phaseKey === "documentation-note")).toMatchObject({
attempts: 1,
state: "skipped",
});
const [skipEvent] = await client.db
.select({ idempotencyKey: runEvents.idempotencyKey, payload: runEvents.payload })
.from(runEvents)
.where(and(eq(runEvents.runId, runId), eq(runEvents.type, "phase.skipped")))
.limit(1);
expect(skipEvent?.idempotencyKey).toMatch(/:1$/);
expect(skipEvent?.payload).toMatchObject({
attempt: 1,
phaseKey: "documentation-note",
});
});
it("rejects duplicate active runs for the same canonical repo and base branch", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
maxConcurrentRuns: 100,
workspaceRoot,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "First active run.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
await expect(
engine.startRun({
requirementsMd: "Second active run for the same repo.",
repoPath,
baseBranch: "main",
}),
).rejects.toMatchObject({
code: "active_run_exists",
recoveryHint: JSON.stringify({ currentRunId: runId, currentState: "awaiting_approval" }),
});
});
it("validates a prepared run replay without accepting changed start inputs", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
maxConcurrentRuns: 100,
workspaceRoot,
});
const runId = randomUUID();
const input = {
runId,
requirementsMd: "Validate replayed Temporal start input.",
repoPath,
baseBranch: "main",
scenarios: { spec: "ok" },
};
await engine.prepareRun(input);
runIds.push(runId);
await expect(engine.validatePreparedRunInput(input)).resolves.toBeUndefined();
await expect(
engine.validatePreparedRunInput({
...input,
scenarios: { spec: "timeout" },
}),
).rejects.toMatchObject({ code: "internal_state_corruption" });
});
it("rejects prepared run replay when the persisted worktree path is only a partial directory", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
maxConcurrentRuns: 100,
workspaceRoot,
});
const runId = randomUUID();
const input = {
runId,
requirementsMd: "Reject partial worktree replay.",
repoPath,
baseBranch: "main",
};
await engine.prepareRun(input);
runIds.push(runId);
const [run] = await client.db
.select({ worktreeRoot: runs.worktreeRoot })
.from(runs)
.where(eq(runs.id, runId));
expect(run).toBeDefined();
if (run === undefined) {
throw new Error("prepared run missing");
}
rmSync(run.worktreeRoot, { recursive: true, force: true });
mkdirSync(run.worktreeRoot, { recursive: true });
await expect(engine.prepareRun(input)).rejects.toMatchObject({
code: "workspace_permissions",
});
});
it("rejects prepared run replay when the persisted worktree belongs to another repo", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
maxConcurrentRuns: 100,
workspaceRoot,
});
const runId = randomUUID();
const input = {
runId,
requirementsMd: "Reject a replayed worktree that belongs to a different repo.",
repoPath,
baseBranch: "main",
};
await engine.prepareRun(input);
runIds.push(runId);
const [run] = await client.db
.select({ worktreeRoot: runs.worktreeRoot })
.from(runs)
.where(eq(runs.id, runId));
expect(run).toBeDefined();
if (run === undefined) {
throw new Error("prepared run missing");
}
rmSync(run.worktreeRoot, { recursive: true, force: true });
mkdirSync(run.worktreeRoot, { recursive: true });
execFileSync("git", ["init", "-b", `devflow/${runId}/main`], {
cwd: run.worktreeRoot,
stdio: "ignore",
});
await expect(engine.prepareRun(input)).rejects.toMatchObject({
code: "workspace_permissions",
});
});
it("enforces the configured maximum concurrent active runs", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const candidateRepoPath = createGitRepo();
tempRoots.push(workspaceRoot, candidateRepoPath);
const [template] = await client.db
.select({ hash: workflowTemplates.hash, id: workflowTemplates.id })
.from(workflowTemplates)
.where(and(eq(workflowTemplates.name, "development"), eq(workflowTemplates.version, 1)))
.limit(1);
if (template === undefined) {
throw new Error("development@1 template missing");
}
for (let index = 0; index < 4; index += 1) {
const activeRunId = randomUUID();
const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-active-repo-")));
const worktreeRoot = realpathSync(
mkdtempSync(join(tmpdir(), "devflow-engine-active-worktree-")),
);
tempRoots.push(repoPath, worktreeRoot);
runIds.push(activeRunId);
await client.db.insert(runs).values({
id: activeRunId,
templateId: template.id,
templateHash: template.hash,
state: "executing",
repoPath,
baseBranch: `branch-${index}`,
worktreeRoot,
});
}
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
maxConcurrentRuns: 4,
workspaceRoot,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
await expect(
engine.startRun({
requirementsMd: "This run should exceed the concurrency limit.",
repoPath: candidateRepoPath,
baseBranch: "main",
}),
).rejects.toMatchObject({
code: "max_concurrent_runs",
});
});
it("pauses and resumes an approval wait without resolving the gate", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Pause while waiting for approval.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
await engine.pauseRun(runId);
expect((await engine.getStatus(runId)).run.state).toBe("paused");
const [pendingApproval] = await client.db
.select({ id: approvalRequests.id, state: approvalRequests.state })
.from(approvalRequests)
.where(eq(approvalRequests.runId, runId));
expect(pendingApproval).toMatchObject({ state: "pending" });
if (pendingApproval === undefined) {
throw new Error("pending approval missing");
}
await expect(
engine.signalApproval(runId, pendingApproval.id, "approve", randomUUID()),
).rejects.toMatchObject({
code: "approval_conflict",
});
await expect(
engine.signalApproval(runId, pendingApproval.id, "reject", randomUUID()),
).rejects.toMatchObject({
code: "approval_conflict",
});
expect((await engine.getStatus(runId)).run.state).toBe("paused");
await engine.resumeRun(runId);
expect((await engine.getStatus(runId)).run.state).toBe("awaiting_approval");
});
it("repairs an active phase that paused after prompt acceptance but before prompt proof", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new PausesAfterPromptAcceptedFakeAdapter(client.db)),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Pause during prompt send and then resume the active phase.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const paused = await engine.getStatus(runId);
expect(paused.run.state).toBe("paused");
expect(paused.phases.find((phase) => phase.phaseKey === "spec")).toMatchObject({
attempts: 1,
state: "running",
});
await engine.resumeRun(runId);
const resumed = await engine.getStatus(runId);
expect(resumed.run.state).toBe("awaiting_approval");
expect(resumed.phases.find((phase) => phase.phaseKey === "spec")).toMatchObject({
attempts: 2,
state: "awaiting_approval",
});
expect(pendingApproval(resumed, "spec_approved")).toBeDefined();
});
it("preserves a human-required artifact gate raised during startRun", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Keep the run paused when artifact repair is exhausted.",
repoPath,
baseBranch: "main",
scenarios: {
spec: { scenario: "invalid", repairScenario: "invalid" },
},
});
runIds.push(runId);
const status = await engine.getStatus(runId);
expect(status.run.state).toBe("paused");
expect(status.run.finalReportPath).toBeNull();
expect(status.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([
["spec", "failed"],
["phase_plan", "pending"],
]);
expect(status.approvals).toMatchObject([
{
gateKey: "artifact_invalid_after_repair",
state: "pending",
},
]);
await expect(engine.resumeRun(runId)).rejects.toMatchObject({
code: "approval_conflict",
});
const approvalId = status.approvals[0]?.id;
expect(approvalId).toBeDefined();
if (approvalId === undefined) {
throw new Error("approval id missing");
}
await expect(
engine.signalApproval(runId, approvalId, "approve", randomUUID()),
).rejects.toMatchObject({
code: "approval_conflict",
});
await expect(
engine.signalApproval(runId, approvalId, "request_changes", randomUUID()),
).rejects.toMatchObject({
code: "approval_conflict",
});
expect((await engine.getStatus(runId)).run.state).toBe("paused");
await engine.signalApproval(runId, approvalId, "reject", randomUUID());
const rejected = await engine.getStatus(runId);
expect(rejected.run.state).toBe("failed");
expect(rejected.run.finalReportPath).toMatch(/\.report\.md$/);
});
it("fails terminally with a report when approval-triggered advancement hits a fatal plan error", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Fail when the phase plan has an unbound role.",
repoPath,
baseBranch: "main",
scenarios: {
phase_plan: "unbound-role",
},
});
runIds.push(runId);
const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved");
await engine.signalApproval(runId, specApproval.id, "approve", randomUUID());
const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved");
await expect(
engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()),
).rejects.toMatchObject({
code: "internal_state_corruption",
});
const failed = await engine.getStatus(runId);
expect(failed.run.state).toBe("failed");
expect(failed.run.finalReportPath).toMatch(/\.report\.md$/);
expect(failed.phases.map((phase) => phase.phaseKey)).toEqual(["spec", "phase_plan"]);
if (failed.run.finalReportPath === null) {
throw new Error("final report missing");
}
expect(existsSync(failed.run.finalReportPath)).toBe(true);
const failedSessions = await client.db
.select({ state: tuiSessions.state })
.from(tuiSessions)
.where(eq(tuiSessions.runId, runId));
expect(failedSessions.every((session) => session.state === "FAILED_NEEDS_HUMAN")).toBe(true);
});
it("fails planned phase key collisions instead of silently skipping work", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Fail when planned phase keys collide with template phase keys.",
repoPath,
baseBranch: "main",
scenarios: {
phase_plan: "colliding-spec",
},
});
runIds.push(runId);
const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved");
await engine.signalApproval(runId, specApproval.id, "approve", randomUUID());
const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved");
await expect(
engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()),
).rejects.toMatchObject({
code: "internal_state_corruption",
});
const failed = await engine.getStatus(runId);
expect(failed.run.state).toBe("failed");
expect(failed.phases.map((phase) => phase.phaseKey)).toEqual(["spec", "phase_plan"]);
});
it("validates every planned phase role before inserting dynamic phases", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Fail when any planned role is unbound.",
repoPath,
baseBranch: "main",
scenarios: {
phase_plan: "mixed-unbound-role",
},
});
runIds.push(runId);
const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved");
await engine.signalApproval(runId, specApproval.id, "approve", randomUUID());
const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved");
await expect(
engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()),
).rejects.toMatchObject({
code: "internal_state_corruption",
});
const failed = await engine.getStatus(runId);
expect(failed.run.state).toBe("failed");
expect(failed.phases.map((phase) => phase.phaseKey)).toEqual(["spec", "phase_plan"]);
});
it("marks every session failed when a later planned phase fails fatally", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Fatal planned phase failures should clean every session.",
repoPath,
baseBranch: "main",
scenarios: {
phase_plan: "unknown-schema",
},
});
runIds.push(runId);
const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved");
await engine.signalApproval(runId, specApproval.id, "approve", randomUUID());
const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved");
await expect(
engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()),
).rejects.toMatchObject({
code: "fake_fixture_missing",
});
const failed = await engine.getStatus(runId);
expect(failed.run.state).toBe("failed");
expect(failed.run.finalReportPath).toMatch(/\.report\.md$/);
if (failed.run.finalReportPath === null) {
throw new Error("expected failed run to write a final report");
}
expect(existsSync(failed.run.finalReportPath)).toBe(true);
const finalReport = JSON.parse(
readFileSync(failed.run.finalReportPath.replace(/\.report\.md$/, ".report.json"), "utf8"),
) as { status?: unknown };
expect(finalReport.status).toBe("failed");
const sessions = await client.db
.select({ roleId: tuiSessions.roleId, state: tuiSessions.state })
.from(tuiSessions)
.where(eq(tuiSessions.runId, runId));
expect(sessions.length).toBeGreaterThanOrEqual(2);
expect(sessions.every((session) => session.state === "FAILED_NEEDS_HUMAN")).toBe(true);
});
it("repairs final reports when direct advance sees a terminalized fatal phase", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Direct advance should repair terminal reports.",
repoPath,
baseBranch: "main",
scenarios: {
phase_plan: "unknown-schema",
},
});
runIds.push(runId);
const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved");
await engine.signalApproval(runId, specApproval.id, "approve", randomUUID());
const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved");
await engine.signalApprovalForWorkflow(runId, phasePlanApproval.id, "approve", randomUUID());
await expect(engine.advanceRunUntilBlocked(runId)).rejects.toMatchObject({
code: "fake_fixture_missing",
});
const failed = await engine.getStatus(runId);
expect(failed.run.state).toBe("failed");
expect(failed.run.finalReportPath).toMatch(/\.report\.md$/);
});
it("does not start another pending phase when approval replay sees active work", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Approval replay must not run phases concurrently.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const status = await engine.getStatus(runId);
const specApproval = pendingApproval(status, "spec_approved");
const token = randomUUID();
await client.db.insert(approvalDecisions).values({
approvalRequestId: specApproval.id,
action: "approve",
idempotencyKey: `${specApproval.id}:approve:${token}`,
});
await client.db
.update(approvalRequests)
.set({ state: "approved", resolvedAt: new Date() })
.where(eq(approvalRequests.id, specApproval.id));
const activePhaseId = randomUUID();
const pendingPhaseId = randomUUID();
await client.db.insert(runPhases).values([
{
id: activePhaseId,
runId,
phaseKey: "implement-a",
seq: 3,
state: "running",
attempts: 1,
},
{
id: pendingPhaseId,
runId,
phaseKey: "implement-b",
seq: 4,
state: "pending",
},
]);
await client.db
.update(runs)
.set({ state: "executing", currentPhaseId: activePhaseId })
.where(eq(runs.id, runId));
await client.db
.update(runPhases)
.set({ state: "completed", endedAt: new Date() })
.where(and(eq(runPhases.runId, runId), eq(runPhases.phaseKey, "spec")));
await engine.signalApproval(runId, specApproval.id, "approve", token);
const phases = await client.db
.select({
phaseKey: runPhases.phaseKey,
state: runPhases.state,
attempts: runPhases.attempts,
})
.from(runPhases)
.where(eq(runPhases.runId, runId));
expect(phases.find((phase) => phase.phaseKey === "implement-a")).toMatchObject({
state: "running",
attempts: 1,
});
expect(phases.find((phase) => phase.phaseKey === "implement-b")).toMatchObject({
state: "pending",
attempts: 0,
});
});
it("reruns the same phase when approval requests changes", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Request changes to the spec before approving it.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const firstApproval = (await engine.getStatus(runId)).approvals[0];
expect(firstApproval).toBeDefined();
if (firstApproval === undefined) {
throw new Error("first approval missing");
}
await engine.signalApproval(runId, firstApproval.id, "request_changes", randomUUID());
const changed = await engine.getStatus(runId);
expect(changed.run.state).toBe("awaiting_approval");
expect(changed.phases.map((phase) => [phase.phaseKey, phase.state, phase.attempts])).toEqual([
["spec", "awaiting_approval", 2],
["phase_plan", "pending", 0],
]);
expect(changed.approvals.map((approval) => [approval.gateKey, approval.state])).toEqual([
["spec_approved", "changes_requested"],
["spec_approved", "pending"],
]);
const secondApproval = changed.approvals.find((approval) => approval.state === "pending");
expect(secondApproval).toBeDefined();
if (secondApproval === undefined) {
throw new Error("second approval missing");
}
await engine.signalApproval(runId, secondApproval.id, "approve", randomUUID());
const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved");
await engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID());
const completed = await engine.getStatus(runId);
expect(completed.run.state).toBe("completed");
expect(completed.phases.map((phase) => [phase.phaseKey, phase.state, phase.attempts])).toEqual([
["spec", "completed", 2],
["phase_plan", "completed", 1],
["implement", "completed", 1],
]);
});
it("aborts pending approvals and sessions, and stale approvals cannot resume the run", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Abort while waiting for approval.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const pendingApproval = (await engine.getStatus(runId)).approvals[0];
expect(pendingApproval).toBeDefined();
if (pendingApproval === undefined) {
throw new Error("pending approval missing");
}
await engine.pauseRun(runId);
expect((await engine.getStatus(runId)).run.state).toBe("paused");
await engine.abortRun(runId, "user requested abort");
const aborted = await engine.getStatus(runId);
expect(aborted.run.state).toBe("aborted");
expect(aborted.approvals).toMatchObject([{ state: "aborted" }]);
const abortEvents = await client.db
.select({ type: runEvents.type })
.from(runEvents)
.where(eq(runEvents.runId, runId))
.orderBy(runEvents.seq);
expect(abortEvents.map((event) => event.type)).not.toContain("approval.resolved");
const abortDecisions = await client.db
.select()
.from(approvalDecisions)
.where(eq(approvalDecisions.approvalRequestId, pendingApproval.id));
expect(abortDecisions).toEqual([]);
expect(aborted.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([
["spec", "failed"],
["phase_plan", "pending"],
]);
const sessions = await client.db
.select({ state: tuiSessions.state })
.from(tuiSessions)
.where(eq(tuiSessions.runId, runId));
expect(sessions).toEqual([{ state: "FAILED_NEEDS_HUMAN" }]);
const [abortedRow] = await client.db
.select({ pausedFromState: runs.pausedFromState })
.from(runs)
.where(eq(runs.id, runId));
expect(abortedRow?.pausedFromState).toBeNull();
await expect(
engine.signalApproval(runId, pendingApproval.id, "approve", randomUUID()),
).rejects.toMatchObject({
code: "approval_conflict",
});
expect((await engine.getStatus(runId)).run.state).toBe("aborted");
});
it("surfaces session dispose failures during abort", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new DisposeFailsFakeAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Abort while waiting for approval.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
await expect(engine.abortRun(runId, "user requested abort")).rejects.toMatchObject({
code: "pane_briefly_unresponsive",
});
const aborted = await engine.getStatus(runId);
expect(aborted.run.state).toBe("aborted");
const [run] = await client.db
.select({ finalReportPath: runs.finalReportPath })
.from(runs)
.where(eq(runs.id, runId));
expect(run?.finalReportPath).toMatch(/\.report\.md$/);
});
it("captures terminal session transcripts before abort disposal", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const adapter = new CaptureOrderingFakeAdapter({ writeDelayMs: 0 });
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, adapter),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Abort after capturing terminal transcript.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
adapter.events.length = 0;
await engine.abortRun(runId, "user requested abort");
expect(adapter.events).toEqual(["capture", "dispose"]);
const [session] = await client.db
.select({ lastCaptureSeq: tuiSessions.lastCaptureSeq })
.from(tuiSessions)
.where(eq(tuiSessions.runId, runId));
expect(session?.lastCaptureSeq).toBeGreaterThan(0n);
});
it("retries terminal approval cleanup idempotently when a decision is replayed", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const adapter = new CaptureFailsAfterDisposeFakeAdapter({ writeDelayMs: 0 });
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, adapter),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Replay a terminal approval decision.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const approvalId = (await engine.getStatus(runId)).approvals[0]?.id;
expect(approvalId).toBeDefined();
if (approvalId === undefined) {
throw new Error("approval id missing");
}
const clientToken = randomUUID();
adapter.events.length = 0;
await engine.signalApproval(runId, approvalId, "reject", clientToken);
expect(adapter.events).toEqual(["capture", "dispose"]);
adapter.events.length = 0;
await expect(
engine.signalApproval(runId, approvalId, "reject", clientToken),
).resolves.toBeUndefined();
expect(adapter.events).toEqual(["capture", "dispose"]);
expect((await engine.getStatus(runId)).run.state).toBe("failed");
});
it("uses persisted tmux handles when capturing and disposing terminal sessions", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const sessions = new TerminalHandleRecordingRuntime();
const engine = new DbRunEngine({
db: client.db,
sessions,
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Abort after persisting tmux handle fields.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
await client.db
.update(tuiSessions)
.set({
lastCaptureSeq: 1n,
lastKnownPanePid: 777,
tmuxSession: "persisted-session",
tmuxWindow: "persisted-window",
})
.where(eq(tuiSessions.runId, runId));
await engine.abortRun(runId, "user requested abort");
expect(sessions.captureHandles).toContainEqual({
sessionId: expect.any(String),
pid: 777,
transcriptBaseline: {
startSeq: 1n,
lines: expect.arrayContaining([expect.any(String)]),
},
tmuxSession: "persisted-session",
tmuxWindow: "persisted-window",
});
expect(sessions.disposeHandles).toContainEqual({
sessionId: expect.any(String),
pid: 777,
tmuxSession: "persisted-session",
tmuxWindow: "persisted-window",
});
});
it("attempts disposal when transcript capture fails during cleanup", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const adapter = new CaptureOrderingFakeAdapter({ writeDelayMs: 0 });
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, adapter),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Abort with failed transcript capture.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
adapter.events.length = 0;
adapter.failCapture = true;
await expect(engine.abortRun(runId, "user requested abort")).rejects.toMatchObject({
code: "pane_briefly_unresponsive",
});
expect(adapter.events).toEqual(["capture", "dispose"]);
const [run] = await client.db
.select({ finalReportPath: runs.finalReportPath, state: runs.state })
.from(runs)
.where(eq(runs.id, runId));
expect(run).toMatchObject({ state: "aborted" });
expect(run?.finalReportPath).toMatch(/\.report\.md$/);
adapter.events.length = 0;
adapter.failCapture = false;
await engine.abortRun(runId, "retry abort cleanup");
expect(adapter.events).toEqual(["capture", "dispose"]);
});
it("writes a failed final report before surfacing approval reject dispose failures", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new DisposeFailsFakeAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Reject while waiting for approval.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const request = pendingApproval(await engine.getStatus(runId), "spec_approved");
await expect(
engine.signalApproval(runId, request.id, "reject", randomUUID()),
).rejects.toMatchObject({
code: "pane_briefly_unresponsive",
});
const [run] = await client.db
.select({ finalReportPath: runs.finalReportPath, state: runs.state })
.from(runs)
.where(eq(runs.id, runId));
expect(run?.state).toBe("failed");
expect(run?.finalReportPath).toMatch(/\.report\.md$/);
});
it("writes a failed final report before surfacing workflow approval reject dispose failures", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new DisposeFailsFakeAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Reject through workflow approval path.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const request = pendingApproval(await engine.getStatus(runId), "spec_approved");
await expect(
engine.signalApprovalForWorkflow(runId, request.id, "reject", randomUUID()),
).rejects.toMatchObject({
code: "pane_briefly_unresponsive",
});
const [run] = await client.db
.select({ finalReportPath: runs.finalReportPath, state: runs.state })
.from(runs)
.where(eq(runs.id, runId));
expect(run?.state).toBe("failed");
expect(run?.finalReportPath).toMatch(/\.report\.md$/);
});
it("sweeps non-terminal M4 runs on API startup recovery", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Leave a non-terminal run for restart recovery.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const swept = await sweepM4ProcessRestart(client.db, { runIds: [runId] });
expect(swept.sweptRunIds).toContain(runId);
const recovered = await engine.getStatus(runId);
expect(recovered.run.state).toBe("failed");
expect(recovered.run.finalReportPath).toBeNull();
expect(recovered.run.currentPhaseId).toBeNull();
expect(recovered.approvals).toMatchObject([{ state: "aborted" }]);
expect(recovered.eventsTail.map((event) => event.type)).not.toContain("approval.resolved");
const recoveredDecisions = await client.db
.select()
.from(approvalDecisions)
.where(eq(approvalDecisions.approvalRequestId, recovered.approvals[0]?.id ?? ""));
expect(recoveredDecisions).toEqual([]);
expect(recovered.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([
["spec", "failed"],
["phase_plan", "pending"],
]);
const sessions = await client.db
.select({ state: tuiSessions.state })
.from(tuiSessions)
.where(eq(tuiSessions.runId, runId));
expect(sessions).toEqual([{ state: "FAILED_NEEDS_HUMAN" }]);
expect(recovered.eventsTail.map((event) => event.type)).toContain("run.failed");
});
it("records planning as the paused-from state when phase planning needs human recovery", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Phase planning should use the planning run state.",
repoPath,
baseBranch: "main",
scenarios: {
phase_plan: { scenario: "invalid", repairScenario: "invalid" },
},
});
runIds.push(runId);
const approval = (await engine.getStatus(runId)).approvals[0];
expect(approval).toBeDefined();
if (approval === undefined) {
throw new Error("approval missing");
}
await engine.signalApproval(runId, approval.id, "approve", randomUUID());
const [run] = await client.db
.select({ state: runs.state, pausedFromState: runs.pausedFromState })
.from(runs)
.where(eq(runs.id, runId));
expect(run).toMatchObject({ state: "paused", pausedFromState: "planning" });
const status = await engine.getStatus(runId);
expect(status.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([
["spec", "completed"],
["phase_plan", "failed"],
]);
});
it("rejects reuse of an approval client token with a different action", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Check approval token conflict.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const [request] = await client.db
.select({ id: approvalRequests.id })
.from(approvalRequests)
.where(and(eq(approvalRequests.runId, runId), eq(approvalRequests.state, "pending")));
expect(request).toBeDefined();
if (request === undefined) {
throw new Error("approval request missing");
}
const clientToken = randomUUID();
await engine.signalApproval(runId, request.id, "approve", clientToken);
await expect(
engine.signalApproval(runId, request.id, "reject", clientToken),
).rejects.toMatchObject({
code: "approval_conflict",
});
});
it("does not treat a client token suffix as an approval replay", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Check approval token suffix handling.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const [request] = await client.db
.select({ id: approvalRequests.id })
.from(approvalRequests)
.where(and(eq(approvalRequests.runId, runId), eq(approvalRequests.state, "pending")));
expect(request).toBeDefined();
if (request === undefined) {
throw new Error("approval request missing");
}
await engine.signalApproval(runId, request.id, "approve", "prefix:shared-token");
await expect(
engine.signalApproval(runId, request.id, "approve", "shared-token"),
).rejects.toMatchObject({
code: "approval_conflict",
});
});
it("replays terminal approval cleanup side effects idempotently", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
tempRoots.push(workspaceRoot, repoPath);
const adapter = new DisposeCountingFakeAdapter({ writeDelayMs: 0 });
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, adapter),
workspaceRoot,
maxConcurrentRuns: 100,
wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 },
});
const { runId } = await engine.startRun({
requirementsMd: "Reject and replay disposal.",
repoPath,
baseBranch: "main",
});
runIds.push(runId);
const request = pendingApproval(await engine.getStatus(runId), "spec_approved");
const clientToken = randomUUID();
await engine.signalApproval(runId, request.id, "reject", clientToken);
expect(adapter.disposeCalls).toBe(1);
await engine.signalApproval(runId, request.id, "reject", clientToken);
expect(adapter.disposeCalls).toBe(2);
await engine.replayAppliedApprovalSideEffects(runId, "reject");
expect(adapter.disposeCalls).toBe(3);
});
it("repairs missing aborted final reports during applied approval replay", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-worktree-")));
tempRoots.push(workspaceRoot, repoPath, worktreeRoot);
const [template] = await client.db
.select({ hash: workflowTemplates.hash, id: workflowTemplates.id })
.from(workflowTemplates)
.where(eq(workflowTemplates.name, "development"))
.limit(1);
if (template === undefined) {
throw new Error("development template missing");
}
const runId = randomUUID();
runIds.push(runId);
await client.db.insert(runs).values({
id: runId,
templateId: template.id,
templateHash: template.hash,
state: "aborted",
repoPath,
baseBranch: "main",
worktreeRoot,
endedAt: new Date(),
finalReportPath: null,
});
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
});
await engine.replayAppliedApprovalSideEffects(runId, "approve");
const [run] = await client.db
.select({ finalReportPath: runs.finalReportPath })
.from(runs)
.where(eq(runs.id, runId));
expect(run?.finalReportPath).toMatch(/\.report\.md$/);
if (run?.finalReportPath === null || run?.finalReportPath === undefined) {
throw new Error("final report was not repaired");
}
expect(
JSON.parse(
readFileSync(run.finalReportPath.replace(/\.report\.md$/, ".report.json"), "utf8"),
),
).toMatchObject({ runId, status: "aborted" });
});
it("repairs terminal final reports before surfacing approval replay dispose failures", async () => {
client = createDbClient(databaseUrl);
await seedDevelopmentRegistry(client.db);
const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-")));
const repoPath = createGitRepo();
const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-worktree-")));
tempRoots.push(workspaceRoot, repoPath, worktreeRoot);
const [template] = await client.db
.select({ hash: workflowTemplates.hash, id: workflowTemplates.id })
.from(workflowTemplates)
.where(eq(workflowTemplates.name, "development"))
.limit(1);
if (template === undefined) {
throw new Error("development template missing");
}
const runId = randomUUID();
runIds.push(runId);
await client.db.insert(runs).values({
id: runId,
templateId: template.id,
templateHash: template.hash,
state: "aborted",
repoPath,
baseBranch: "main",
worktreeRoot,
endedAt: new Date(),
finalReportPath: null,
});
await client.db.insert(tuiSessions).values({
id: randomUUID(),
runId,
roleId: "implementer",
backend: "fake",
cwd: worktreeRoot,
state: "FAILED_NEEDS_HUMAN",
});
const engine = new DbRunEngine({
db: client.db,
sessions: sessionRuntime(client.db, new DisposeFailsFakeAdapter({ writeDelayMs: 0 })),
workspaceRoot,
maxConcurrentRuns: 100,
});
await expect(engine.replayAppliedApprovalSideEffects(runId, "abort")).rejects.toMatchObject({
code: "pane_briefly_unresponsive",
});
const [run] = await client.db
.select({ finalReportPath: runs.finalReportPath })
.from(runs)
.where(eq(runs.id, runId));
expect(run?.finalReportPath).toMatch(/\.report\.md$/);
});
});
function pendingApproval(status: Awaited<ReturnType<DbRunEngine["getStatus"]>>, gateKey: string) {
const approval = status.approvals.find(
(candidate) => candidate.gateKey === gateKey && candidate.state === "pending",
);
expect(approval).toBeDefined();
if (approval === undefined) {
throw new Error(`${gateKey} approval missing`);
}
return approval;
}
async function seedDevelopmentRegistry(db: DbClient["db"]) {
const [templateEntry] = loadTemplateFiles(resolve("docs/schemas/templates")).filter(
(entry) => entry.name === "development" && entry.version === 1,
);
if (templateEntry === undefined) {
throw new Error("development@1 template fixture is missing");
}
await db
.insert(workflowTemplates)
.values({
name: templateEntry.name,
version: templateEntry.version,
hash: templateEntry.hash,
definition: templateEntry.definition,
})
.onConflictDoUpdate({
target: [workflowTemplates.name, workflowTemplates.version],
set: { hash: templateEntry.hash, definition: templateEntry.definition },
});
for (const personaEntry of loadPersonaFiles(resolve("docs/schemas/personas"))) {
await db
.insert(agentPersonas)
.values({
name: personaEntry.name,
version: personaEntry.version,
hash: personaEntry.hash,
definition: personaEntry.definition,
})
.onConflictDoNothing({ target: [agentPersonas.name, agentPersonas.version] });
}
}