From 78ebd5ef78d9c4ab22af1434ea4f1878aecd8b60 Mon Sep 17 00:00:00 2001 From: chungyeong Date: Mon, 11 May 2026 00:46:45 +0900 Subject: [PATCH] feat: add minimal run engine --- apps/api/package.json | 17 + apps/api/src/index.test.ts | 746 ++++++ apps/api/src/index.ts | 128 + apps/api/src/startup.ts | 14 + apps/api/tsconfig.json | 15 + .../personas/fake-devflow-agent@1.yaml | 15 + docs/schemas/templates/development@1.yaml | 35 + packages/run-engine/src/engine.test.ts | 1098 ++++++++ packages/run-engine/src/engine.ts | 2336 +++++++++++++++++ .../run-engine/src/fake-phase-harness.test.ts | 325 ++- packages/run-engine/src/fake-phase-harness.ts | 914 +++++-- packages/run-engine/src/index.ts | 1 + packages/session/package.json | 4 +- packages/session/src/index.ts | 1 + packages/session/src/manager.ts | 410 +++ packages/session/tsconfig.json | 2 +- pnpm-lock.yaml | 16 +- .../dev/phase-plan@1/colliding-spec.json | 24 + .../dev/phase-plan@1/mixed-unbound-role.json | 24 + .../fake-artifacts/dev/phase-plan@1/ok.json | 24 + .../dev/phase-plan@1/skip-only.json | 11 + .../dev/phase-plan@1/two-phases.json | 44 + .../dev/phase-plan@1/unbound-role.json | 24 + .../dev/phase-plan@1/unknown-schema.json | 24 + tsconfig.json | 1 + vitest.workspace.ts | 1 + 26 files changed, 6045 insertions(+), 209 deletions(-) create mode 100644 apps/api/package.json create mode 100644 apps/api/src/index.test.ts create mode 100644 apps/api/src/index.ts create mode 100644 apps/api/src/startup.ts create mode 100644 apps/api/tsconfig.json create mode 100644 docs/schemas/personas/fake-devflow-agent@1.yaml create mode 100644 docs/schemas/templates/development@1.yaml create mode 100644 packages/run-engine/src/engine.test.ts create mode 100644 packages/run-engine/src/engine.ts create mode 100644 packages/session/src/manager.ts create mode 100644 tests/fixtures/fake-artifacts/dev/phase-plan@1/colliding-spec.json create mode 100644 tests/fixtures/fake-artifacts/dev/phase-plan@1/mixed-unbound-role.json create mode 100644 tests/fixtures/fake-artifacts/dev/phase-plan@1/ok.json create mode 100644 tests/fixtures/fake-artifacts/dev/phase-plan@1/skip-only.json create mode 100644 tests/fixtures/fake-artifacts/dev/phase-plan@1/two-phases.json create mode 100644 tests/fixtures/fake-artifacts/dev/phase-plan@1/unbound-role.json create mode 100644 tests/fixtures/fake-artifacts/dev/phase-plan@1/unknown-schema.json diff --git a/apps/api/package.json b/apps/api/package.json new file mode 100644 index 0000000..9e99e90 --- /dev/null +++ b/apps/api/package.json @@ -0,0 +1,17 @@ +{ + "name": "@devflow/api", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "build": "tsup src/index.ts --format esm --clean", + "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", + "test": "cd ../.. && vitest run --project apps/api" + }, + "dependencies": { + "@devflow/core": "workspace:*", + "@devflow/db": "workspace:*", + "@devflow/run-engine": "workspace:*", + "@devflow/session": "workspace:*" + } +} diff --git a/apps/api/src/index.test.ts b/apps/api/src/index.test.ts new file mode 100644 index 0000000..b93e222 --- /dev/null +++ b/apps/api/src/index.test.ts @@ -0,0 +1,746 @@ +import { execFileSync } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import { mkdtempSync, realpathSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; + +import { + DevflowError, + type PromptEnvelope, + loadPersonaFiles, + loadTemplateFiles, +} from "@devflow/core"; +import { + type DbClient, + agentPersonas, + approvalDecisions, + approvalRequests, + createDbClient, + runEvents, + runs, + tuiSessions, + workflowTemplates, +} from "@devflow/db"; +import { DbRunEngine } from "@devflow/run-engine"; +import { FakeSessionAdapter, type SessionHandle, SessionManager } from "@devflow/session"; +import { and, eq, inArray } from "drizzle-orm"; +import { afterEach, describe, expect, it } from "vitest"; + +import { startApi } from "./index.js"; + +const databaseUrl = + process.env.DATABASE_URL ?? "postgres://devflow:devflow@127.0.0.1:55432/devflow"; + +class ResumeFailsFakeSessionAdapter extends FakeSessionAdapter { + resumeAttempts = 0; + + override async resume(_handle: SessionHandle): Promise { + this.resumeAttempts += 1; + throw new DevflowError("resume failed", { + class: "recoverable", + code: "pane_briefly_unresponsive", + recoveryHint: "resume failed", + }); + } +} + +class ResumeSucceedsAfterTwoFailuresFakeSessionAdapter extends FakeSessionAdapter { + resumeAttempts = 0; + + override async resume(handle: SessionHandle): Promise { + this.resumeAttempts += 1; + if (this.resumeAttempts <= 2) { + throw new DevflowError("resume failed transiently", { + class: "recoverable", + code: "pane_briefly_unresponsive", + recoveryHint: "resume failed transiently", + }); + } + return super.resume(handle); + } +} + +class DelayedSendPromptFakeSessionAdapter extends FakeSessionAdapter { + readonly promptStarted = deferred(); + readonly releasePrompt = deferred(); + + override async sendPrompt( + handle: SessionHandle, + envelope: PromptEnvelope, + ): Promise<{ promptId: string }> { + this.promptStarted.resolve(); + await this.releasePrompt.promise; + return super.sendPrompt(handle, envelope); + } +} + +function deferred() { + let resolve!: (value: T | PromiseLike) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((promiseResolve, promiseReject) => { + resolve = promiseResolve; + reject = promiseReject; + }); + return { promise, reject, resolve }; +} + +function createGitRepo(): string { + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-repo-"))); + execFileSync("git", ["init", "-b", "main"], { cwd: repoPath, stdio: "ignore" }); + writeFileSync(join(repoPath, "README.md"), "# API fixture\n"); + execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" }); + execFileSync( + "git", + [ + "-c", + "user.name=Devflow Test", + "-c", + "user.email=devflow@example.test", + "commit", + "-m", + "initial", + ], + { cwd: repoPath, stdio: "ignore" }, + ); + return repoPath; +} + +async function seedDevelopmentRegistry(db: DbClient["db"]) { + const [templateEntry] = loadTemplateFiles(resolve("docs/schemas/templates")).filter( + (entry) => entry.name === "development" && entry.version === 1, + ); + if (templateEntry === undefined) { + throw new Error("development@1 template fixture is missing"); + } + await db + .insert(workflowTemplates) + .values({ + name: templateEntry.name, + version: templateEntry.version, + hash: templateEntry.hash, + definition: templateEntry.definition, + }) + .onConflictDoUpdate({ + target: [workflowTemplates.name, workflowTemplates.version], + set: { hash: templateEntry.hash, definition: templateEntry.definition }, + }); + + for (const personaEntry of loadPersonaFiles(resolve("docs/schemas/personas"))) { + await db + .insert(agentPersonas) + .values({ + name: personaEntry.name, + version: personaEntry.version, + hash: personaEntry.hash, + definition: personaEntry.definition, + }) + .onConflictDoNothing({ target: [agentPersonas.name, agentPersonas.version] }); + } +} + +async function waitForRunEventType(db: DbClient["db"], runId: string, type: string) { + const deadline = Date.now() + 2_000; + while (Date.now() < deadline) { + const [event] = await db + .select({ id: runEvents.id }) + .from(runEvents) + .where(and(eq(runEvents.runId, runId), eq(runEvents.type, type))) + .limit(1); + if (event !== undefined) { + return; + } + await new Promise((resolveWait) => setTimeout(resolveWait, 10)); + } + throw new Error(`timed out waiting for ${type}`); +} + +describe("startApi", () => { + let client: DbClient | undefined; + const runIds: string[] = []; + const templateIds: string[] = []; + const tempRoots: string[] = []; + + function createApiWorkspaceRoot(): string { + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-workspace-"))); + tempRoots.push(workspaceRoot); + return workspaceRoot; + } + + function startTestApi(options: Parameters[0] = {}) { + return startApi({ workspaceRoot: createApiWorkspaceRoot(), ...options }); + } + + afterEach(async () => { + if (client !== undefined) { + if (runIds.length > 0) { + const requests = await client.db + .select({ id: approvalRequests.id }) + .from(approvalRequests) + .where(inArray(approvalRequests.runId, [...runIds])); + if (requests.length > 0) { + await client.db.delete(approvalDecisions).where( + inArray( + approvalDecisions.approvalRequestId, + requests.map((request) => request.id), + ), + ); + } + await client.db + .delete(approvalRequests) + .where(inArray(approvalRequests.runId, [...runIds])); + await client.db.delete(runs).where(inArray(runs.id, [...runIds])); + } + if (templateIds.length > 0) { + await client.db + .delete(workflowTemplates) + .where(inArray(workflowTemplates.id, [...templateIds])); + } + await client.close(); + client = undefined; + } + + for (const root of tempRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } + runIds.length = 0; + templateIds.length = 0; + }); + + it("runs M4 restart recovery before startup completes", async () => { + client = createDbClient(databaseUrl); + const templateId = randomUUID(); + const runId = randomUUID(); + const sessionId = randomUUID(); + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-repo-"))); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-worktree-"))); + tempRoots.push(repoPath, worktreeRoot); + templateIds.push(templateId); + runIds.push(runId); + + await client.db.insert(workflowTemplates).values({ + id: templateId, + name: `api-startup-${templateId}`, + version: 1, + hash: "a".repeat(64), + definition: { name: "api-startup", version: 1, roles: [], phases: [], defaultGates: [] }, + }); + await client.db.insert(runs).values({ + id: runId, + templateId, + templateHash: "a".repeat(64), + state: "executing", + repoPath, + baseBranch: "main", + worktreeRoot, + }); + await client.db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "spec_writer", + backend: "fake", + cwd: worktreeRoot, + state: "READY", + }); + + const result = await startTestApi({ dbClient: client, recoveryRunIds: [runId] }); + try { + expect(result.recovery).toEqual({ + failedSessionIds: [sessionId], + sweptRunIds: [runId], + }); + expect(result.sessionRecovery).toEqual({ + failedSessionIds: [], + recoveredSessionIds: [], + }); + } finally { + await result.stop(); + } + const [run] = await client.db + .select({ state: runs.state }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + const [session] = await client.db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ state: "FAILED_NEEDS_HUMAN" }); + const events = await client.db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual(["run.failed", "session.failed"]); + }); + + it("holds the SessionManager singleton lock until stopped", async () => { + client = createDbClient(databaseUrl); + const recoveryRunIds = [randomUUID()]; + const first = await startTestApi({ dbClient: client, recoveryRunIds }); + try { + await expect(startTestApi({ dbClient: client, recoveryRunIds })).rejects.toMatchObject({ + code: "session_manager_already_running", + }); + } finally { + await first.stop(); + } + }); + + it("hosts the M4 run engine behind the API startup boundary", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = createApiWorkspaceRoot(); + const repoPath = createGitRepo(); + tempRoots.push(repoPath); + + const api = await startApi({ dbClient: client, workspaceRoot, recoveryRunIds: [] }); + try { + expect(api.engine).toBeInstanceOf(DbRunEngine); + const { runId } = await api.engine.startRun({ + requirementsMd: "Start a fake development run through the API-owned engine.", + repoPath, + baseBranch: "main", + scenarios: { spec: "ok" }, + }); + runIds.push(runId); + + const status = await api.engine.getStatus(runId); + expect(status.run.state).toBe("awaiting_approval"); + expect(status.run.worktreeRoot).toBe(resolve(workspaceRoot, runId, "main")); + expect(status.approvals).toMatchObject([{ gateKey: "spec_approved", state: "pending" }]); + } finally { + await api.stop(); + } + }); + + it("repairs missing terminal final reports during API startup", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = createApiWorkspaceRoot(); + const template = ( + await client.db + .select({ hash: workflowTemplates.hash, id: workflowTemplates.id }) + .from(workflowTemplates) + .where(eq(workflowTemplates.name, "development")) + .limit(1) + )[0]; + if (template === undefined) { + throw new Error("development template missing"); + } + const runId = randomUUID(); + const repoPath = createGitRepo(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-worktree-"))); + tempRoots.push(repoPath, worktreeRoot); + runIds.push(runId); + await client.db.insert(runs).values({ + id: runId, + templateId: template.id, + templateHash: template.hash, + state: "completed", + repoPath, + baseBranch: "main", + worktreeRoot, + endedAt: new Date(), + finalReportPath: null, + }); + + const api = await startApi({ dbClient: client, workspaceRoot, recoveryRunIds: [runId] }); + try { + expect(api.finalReportRecovery).toEqual([runId]); + const [run] = await client.db + .select({ finalReportPath: runs.finalReportPath }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run?.finalReportPath).toMatch(/\.report\.md$/); + } finally { + await api.stop(); + } + }); + + it("does not sweep active runs when a second API instance fails the singleton lock", async () => { + client = createDbClient(databaseUrl); + const first = await startTestApi({ dbClient: client, recoveryRunIds: [] }); + const templateId = randomUUID(); + const runId = randomUUID(); + const sessionId = randomUUID(); + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-repo-"))); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-worktree-"))); + tempRoots.push(repoPath, worktreeRoot); + templateIds.push(templateId); + runIds.push(runId); + try { + await client.db.insert(workflowTemplates).values({ + id: templateId, + name: `api-lock-order-${templateId}`, + version: 1, + hash: "c".repeat(64), + definition: { name: "api-lock-order", version: 1, roles: [], phases: [] }, + }); + await client.db.insert(runs).values({ + id: runId, + templateId, + templateHash: "c".repeat(64), + state: "executing", + repoPath, + baseBranch: "main", + worktreeRoot, + }); + await client.db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "spec_writer", + backend: "fake", + cwd: worktreeRoot, + state: "READY", + }); + + await expect( + startTestApi({ dbClient: client, recoveryRunIds: [runId] }), + ).rejects.toMatchObject({ + code: "session_manager_already_running", + }); + + const [run] = await client.db + .select({ state: runs.state }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run).toEqual({ state: "executing" }); + const [session] = await client.db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ state: "READY" }); + const events = await client.db.select().from(runEvents).where(eq(runEvents.runId, runId)); + expect(events).toEqual([]); + } finally { + await first.stop(); + } + }); + + it("ignores terminal-run sessions during SessionManager startup recovery", async () => { + client = createDbClient(databaseUrl); + const templateId = randomUUID(); + const runId = randomUUID(); + const sessionId = randomUUID(); + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-repo-"))); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-worktree-"))); + tempRoots.push(repoPath, worktreeRoot); + templateIds.push(templateId); + runIds.push(runId); + const adapter = new FakeSessionAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }); + await adapter.start({ + runId, + roleId: "spec_writer", + backend: "fake", + cwd: worktreeRoot, + }); + + await client.db.insert(workflowTemplates).values({ + id: templateId, + name: `api-session-recovery-${templateId}`, + version: 1, + hash: "b".repeat(64), + definition: { name: "api-session-recovery", version: 1, roles: [], phases: [] }, + }); + await client.db.insert(runs).values({ + id: runId, + templateId, + templateHash: "b".repeat(64), + state: "completed", + repoPath, + baseBranch: "main", + worktreeRoot, + }); + await client.db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "spec_writer", + backend: "fake", + cwd: worktreeRoot, + state: "READY", + }); + + const result = await startTestApi({ + dbClient: client, + recoveryRunIds: [runId], + sessionAdapter: adapter, + }); + try { + expect(result.recovery).toEqual({ failedSessionIds: [], sweptRunIds: [] }); + expect(result.sessionRecovery).toEqual({ + failedSessionIds: [], + recoveredSessionIds: [], + }); + } finally { + await result.stop(); + } + const [session] = await client.db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ state: "READY" }); + const approvals = await client.db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + const events = await client.db.select().from(runEvents).where(eq(runEvents.runId, runId)); + expect(events).toEqual([]); + }); + + it("retries transient session resume failures during startup recovery", async () => { + client = createDbClient(databaseUrl); + const templateId = randomUUID(); + const runId = randomUUID(); + const sessionId = randomUUID(); + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-repo-"))); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-worktree-"))); + tempRoots.push(repoPath, worktreeRoot); + templateIds.push(templateId); + runIds.push(runId); + const adapter = new ResumeSucceedsAfterTwoFailuresFakeSessionAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }); + await adapter.start({ + runId, + roleId: "spec_writer", + backend: "fake", + cwd: worktreeRoot, + }); + + await client.db.insert(workflowTemplates).values({ + id: templateId, + name: `api-session-retry-${templateId}`, + version: 1, + hash: "e".repeat(64), + definition: { name: "api-session-retry", version: 1, roles: [], phases: [] }, + }); + await client.db.insert(runs).values({ + id: runId, + templateId, + templateHash: "e".repeat(64), + state: "executing", + repoPath, + baseBranch: "main", + worktreeRoot, + }); + await client.db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "spec_writer", + backend: "fake", + cwd: worktreeRoot, + state: "READY", + }); + + const manager = new SessionManager({ + dbClient: client, + adapter, + recoveryRunIds: [runId], + }); + const recovery = await manager.initialize(); + try { + expect(adapter.resumeAttempts).toBe(3); + expect(recovery).toEqual({ failedSessionIds: [], recoveredSessionIds: [sessionId] }); + const approvals = await client.db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + } finally { + await manager.shutdown(); + } + }); + + it("pauses a non-terminal run when SessionManager startup recovery cannot resume a session", async () => { + client = createDbClient(databaseUrl); + const templateId = randomUUID(); + const runId = randomUUID(); + const sessionId = randomUUID(); + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-repo-"))); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-worktree-"))); + tempRoots.push(repoPath, worktreeRoot); + templateIds.push(templateId); + runIds.push(runId); + + await client.db.insert(workflowTemplates).values({ + id: templateId, + name: `api-session-recovery-failure-${templateId}`, + version: 1, + hash: "d".repeat(64), + definition: { name: "api-session-recovery-failure", version: 1, roles: [], phases: [] }, + }); + await client.db.insert(runs).values({ + id: runId, + templateId, + templateHash: "d".repeat(64), + state: "executing", + repoPath, + baseBranch: "main", + worktreeRoot, + }); + await client.db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "spec_writer", + backend: "fake", + cwd: worktreeRoot, + state: "READY", + }); + + const adapter = new ResumeFailsFakeSessionAdapter(); + const manager = new SessionManager({ + dbClient: client, + adapter, + recoveryRunIds: [runId], + }); + const recovery = await manager.initialize(); + try { + expect(adapter.resumeAttempts).toBe(3); + expect(recovery).toEqual({ failedSessionIds: [sessionId], recoveredSessionIds: [] }); + const [run] = await client.db + .select({ pausedFromState: runs.pausedFromState, state: runs.state }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run).toEqual({ pausedFromState: "executing", state: "paused" }); + const [session] = await client.db + .select({ recoveryAttempts: tuiSessions.recoveryAttempts, state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ recoveryAttempts: 1, state: "FAILED_NEEDS_HUMAN" }); + const [approval] = await client.db + .select({ + gateKey: approvalRequests.gateKey, + phaseId: approvalRequests.phaseId, + state: approvalRequests.state, + }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ + gateKey: "session_recovery_required", + phaseId: null, + state: "pending", + }); + const events = await client.db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "session.failed", + "run.paused", + "approval.requested", + ]); + } finally { + await manager.shutdown(); + } + }); + + it("keeps the singleton lock while shutdown drains in-flight session operations", async () => { + client = createDbClient(databaseUrl); + const adapter = new DelayedSendPromptFakeSessionAdapter({ writeDelayMs: 0 }); + const manager = new SessionManager({ + dbClient: client, + adapter, + recoveryRunIds: [], + shutdownDrainMs: 5_000, + }); + await manager.initialize(); + const runId = randomUUID(); + const cwd = realpathSync(mkdtempSync(join(tmpdir(), "devflow-api-session-"))); + tempRoots.push(cwd); + const handle = await manager.start({ + runId, + roleId: "spec_writer", + backend: "fake", + cwd, + }); + const envelope: PromptEnvelope = { + uuid: randomUUID(), + runId, + roleId: "spec_writer", + phaseKey: "spec", + attempt: 0, + expectedArtifact: join(tmpdir(), `${randomUUID()}.json`), + expectedSchema: "dev/spec@1", + dedupKey: `dedup-${randomUUID()}`, + instructions: "Scenario: timeout", + }; + const promptPromise = manager.sendPrompt(handle, envelope); + await adapter.promptStarted.promise; + + const shutdownPromise = manager.shutdown(); + await expect( + new SessionManager({ + dbClient: client, + adapter: new FakeSessionAdapter(), + recoveryRunIds: [], + }).initialize(), + ).rejects.toMatchObject({ code: "session_manager_already_running" }); + + adapter.releasePrompt.resolve(undefined); + await expect(promptPromise).resolves.toEqual({ promptId: envelope.dedupKey }); + await shutdownPromise; + + const nextManager = new SessionManager({ + dbClient: client, + adapter: new FakeSessionAdapter(), + recoveryRunIds: [], + }); + await expect(nextManager.initialize()).resolves.toEqual({ + failedSessionIds: [], + recoveredSessionIds: [], + }); + await nextManager.shutdown(); + }); + + it("keeps the singleton lock while shutdown drains in-flight artifact polling", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = createApiWorkspaceRoot(); + const repoPath = createGitRepo(); + tempRoots.push(repoPath); + const runId = randomUUID(); + runIds.push(runId); + const api = await startApi({ + dbClient: client, + workspaceRoot, + recoveryRunIds: [], + sessionAdapter: new FakeSessionAdapter({ writeDelayMs: 1_000 }), + }); + const startPromise = api.engine.startRun({ + runId, + requirementsMd: "Keep artifact polling in flight during shutdown.", + repoPath, + baseBranch: "main", + scenarios: { spec: "ok" }, + }); + await waitForRunEventType(client.db, runId, "artifact.expected"); + + const stopPromise = api.stop(); + await expect( + new SessionManager({ + dbClient: client, + adapter: new FakeSessionAdapter(), + recoveryRunIds: [], + }).initialize(), + ).rejects.toMatchObject({ code: "session_manager_already_running" }); + + await expect(startPromise).resolves.toEqual({ runId }); + await stopPromise; + const nextManager = new SessionManager({ + dbClient: client, + adapter: new FakeSessionAdapter(), + recoveryRunIds: [], + }); + await expect(nextManager.initialize()).resolves.toEqual({ + failedSessionIds: [], + recoveredSessionIds: [], + }); + await nextManager.shutdown(); + }); +}); diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts new file mode 100644 index 0000000..ae787ec --- /dev/null +++ b/apps/api/src/index.ts @@ -0,0 +1,128 @@ +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { type BackendConfig, getConfig } from "@devflow/core"; +import { DevflowError } from "@devflow/core"; +import { type DbClient, createDbClient } from "@devflow/db"; +import { DbRunEngine, type RunEngine } from "@devflow/run-engine"; +import { + FakeSessionAdapter, + type SessionAdapter, + SessionManager, + type SessionManagerRecoveryResult, +} from "@devflow/session"; + +import { recoverM4ApiStartup, startM4SessionManager } from "./startup.js"; + +export * from "./startup.js"; + +export interface StartApiOptions { + dbClient?: DbClient; + workspaceRoot?: string; + availableBackends?: readonly BackendConfig[]; + recoveryRunIds?: readonly string[]; + sessionAdapter?: SessionAdapter; + sessionManager?: SessionManager; + runEngine?: RunEngine; +} + +export interface StartApiResult { + recovery: Awaited>; + sessionRecovery: SessionManagerRecoveryResult; + sessionManager: SessionManager; + engine: RunEngine; + finalReportRecovery: string[]; + stop(): Promise; +} + +export async function startApi(options: StartApiOptions = {}): Promise { + const ownedClient = options.dbClient === undefined; + const config = ownedClient || options.workspaceRoot === undefined ? getConfig() : undefined; + const dbClient = + options.dbClient ?? createDbClient(config?.DATABASE_URL ?? getConfig().DATABASE_URL); + const sessionManager = + options.sessionManager ?? + new SessionManager({ + dbClient, + adapter: options.sessionAdapter ?? new FakeSessionAdapter(), + ...(options.recoveryRunIds === undefined ? {} : { recoveryRunIds: options.recoveryRunIds }), + }); + const engine = + options.runEngine ?? + new DbRunEngine({ + db: dbClient.db, + sessions: sessionManager, + workspaceRoot: options.workspaceRoot ?? config?.WORKSPACE_ROOT ?? getConfig().WORKSPACE_ROOT, + ...(options.availableBackends === undefined + ? config?.backends === undefined + ? {} + : { availableBackends: config.backends } + : { availableBackends: options.availableBackends }), + }); + + try { + await sessionManager.acquireLock(); + const recovery = await recoverM4ApiStartup( + dbClient.db, + options.recoveryRunIds === undefined ? {} : { runIds: options.recoveryRunIds }, + ); + const sessionRecovery = await startM4SessionManager(sessionManager); + const finalReportRecovery = + engine instanceof DbRunEngine + ? await engine.recoverMissingFinalReports( + options.recoveryRunIds === undefined ? {} : { runIds: options.recoveryRunIds }, + ) + : []; + return { + engine, + finalReportRecovery, + recovery, + sessionRecovery, + sessionManager, + async stop() { + await sessionManager.shutdown(); + if (ownedClient) { + await dbClient.close(); + } + }, + }; + } catch (error) { + if (options.sessionManager === undefined) { + await sessionManager.shutdown().catch(() => undefined); + } + if (ownedClient) { + await dbClient.close(); + } + throw error; + } +} + +if (isDirectEntry(import.meta.url, process.argv)) { + startApi() + .then(async (api) => { + await waitForShutdownSignal(); + await api.stop(); + }) + .catch((error: unknown) => { + console.error(error); + process.exitCode = + error instanceof DevflowError && error.code === "session_manager_already_running" ? 3 : 2; + }); +} + +function isDirectEntry(importMetaUrl: string, argv: readonly string[]): boolean { + const entry = argv[1]; + return entry !== undefined && resolve(entry) === fileURLToPath(importMetaUrl); +} + +function waitForShutdownSignal(): Promise { + return new Promise((resolveSignal) => { + const resolveOnce = () => { + process.off("SIGINT", resolveOnce); + process.off("SIGTERM", resolveOnce); + resolveSignal(); + }; + process.once("SIGINT", resolveOnce); + process.once("SIGTERM", resolveOnce); + }); +} diff --git a/apps/api/src/startup.ts b/apps/api/src/startup.ts new file mode 100644 index 0000000..db159b3 --- /dev/null +++ b/apps/api/src/startup.ts @@ -0,0 +1,14 @@ +import type { DbClient } from "@devflow/db"; +import { type M4ProcessRestartSweepOptions, sweepM4ProcessRestart } from "@devflow/run-engine"; +import type { SessionManager } from "@devflow/session"; + +export async function recoverM4ApiStartup( + db: DbClient["db"], + options: M4ProcessRestartSweepOptions = {}, +) { + return sweepM4ProcessRestart(db, options); +} + +export async function startM4SessionManager(sessionManager: SessionManager) { + return sessionManager.recoverSessions(); +} diff --git a/apps/api/tsconfig.json b/apps/api/tsconfig.json new file mode 100644 index 0000000..3f2aeb1 --- /dev/null +++ b/apps/api/tsconfig.json @@ -0,0 +1,15 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist", + "types": ["node", "vitest"] + }, + "include": ["src/**/*.ts"], + "references": [ + { "path": "../../packages/core" }, + { "path": "../../packages/db" }, + { "path": "../../packages/run-engine" }, + { "path": "../../packages/session" } + ] +} diff --git a/docs/schemas/personas/fake-devflow-agent@1.yaml b/docs/schemas/personas/fake-devflow-agent@1.yaml new file mode 100644 index 0000000..b7e0225 --- /dev/null +++ b/docs/schemas/personas/fake-devflow-agent@1.yaml @@ -0,0 +1,15 @@ +name: fake-devflow-agent +version: 1 +backend: fake +capabilities: + - spec_write + - phase_planning + - task_dag_planning + - code_edit + - test_first_development + - command_execute + - final_report_compose +maxRiskLevel: high +promptConfig: + instructionsPrelude: "Use the fake backend fixture protocol." +modelConfig: {} diff --git a/docs/schemas/templates/development@1.yaml b/docs/schemas/templates/development@1.yaml new file mode 100644 index 0000000..b57353e --- /dev/null +++ b/docs/schemas/templates/development@1.yaml @@ -0,0 +1,35 @@ +name: development +version: 1 +roles: + - id: spec_writer + requiredCapabilities: + - spec_write + preferredBackends: + - fake + - id: phase_planner + requiredCapabilities: + - phase_planning + preferredBackends: + - fake +phases: + - key: spec + title: Development Specification + risk: low + roles: + - spec_writer + expectedArtifact: + path: artifacts/spec.json + schema: dev/spec@1 + gates: + - spec_approved + - key: phase_plan + title: Phase Plan + risk: low + roles: + - phase_planner + expectedArtifact: + path: artifacts/phase-plan.json + schema: dev/phase-plan@1 + gates: + - phase_plan_approved +defaultGates: [] diff --git a/packages/run-engine/src/engine.test.ts b/packages/run-engine/src/engine.test.ts new file mode 100644 index 0000000..80d9269 --- /dev/null +++ b/packages/run-engine/src/engine.test.ts @@ -0,0 +1,1098 @@ +import { execFileSync } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import { + existsSync, + mkdtempSync, + readFileSync, + realpathSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; + +import { and, eq, inArray } from "drizzle-orm"; +import { afterEach, describe, expect, it } from "vitest"; + +import { loadPersonaFiles, loadTemplateFiles, validateArtifact } from "@devflow/core"; +import { + type DbClient, + agentPersonas, + approvalDecisions, + approvalRequests, + artifacts, + createDbClient, + runBindings, + runEvents, + runPhases, + runs, + tuiSessions, + workflowTemplates, +} from "@devflow/db"; +import { FakeSessionAdapter, type SessionAdapter, SessionManager } from "@devflow/session"; + +import { DbRunEngine, sweepM4ProcessRestart } from "./engine.js"; + +const databaseUrl = + process.env.DATABASE_URL ?? "postgres://devflow:devflow@127.0.0.1:55432/devflow"; + +function sessionRuntime(db: DbClient["db"], adapter: SessionAdapter) { + return new SessionManager({ db, adapter }); +} + +function createGitRepo(): string { + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-repo-"))); + execFileSync("git", ["init", "-b", "main"], { cwd: repoPath, stdio: "ignore" }); + writeFileSync(join(repoPath, "README.md"), "# Engine fixture\n"); + execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" }); + execFileSync( + "git", + [ + "-c", + "user.name=Devflow Test", + "-c", + "user.email=devflow@example.test", + "commit", + "-m", + "initial", + ], + { cwd: repoPath, stdio: "ignore" }, + ); + return repoPath; +} + +class PausesAfterPromptAcceptedFakeAdapter extends FakeSessionAdapter { + private paused = false; + + constructor(private readonly db: DbClient["db"]) { + super({ writeDelayMs: 0 }); + } + + override async sendPrompt( + handle: Parameters[0], + envelope: Parameters[1], + ): Promise<{ promptId: string }> { + const result = await super.sendPrompt(handle, envelope); + if (!this.paused) { + this.paused = true; + await this.db + .update(runs) + .set({ state: "paused", pausedFromState: "executing", updatedAt: new Date() }) + .where(eq(runs.id, envelope.runId)); + } + return result; + } +} + +describe("DbRunEngine", () => { + let client: DbClient | undefined; + const runIds: string[] = []; + const tempRoots: string[] = []; + + afterEach(async () => { + if (client !== undefined) { + if (runIds.length > 0) { + const requests = await client.db + .select({ id: approvalRequests.id }) + .from(approvalRequests) + .where(inArray(approvalRequests.runId, [...runIds])); + if (requests.length > 0) { + await client.db.delete(approvalDecisions).where( + inArray( + approvalDecisions.approvalRequestId, + requests.map((request) => request.id), + ), + ); + } + await client.db + .delete(approvalRequests) + .where(inArray(approvalRequests.runId, [...runIds])); + await client.db.delete(runs).where(inArray(runs.id, [...runIds])); + } + await client.close(); + client = undefined; + } + + for (const root of tempRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } + runIds.length = 0; + }); + + it("runs development@1 through a spec approval gate and writes final reports", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Build the requested change using the fake development workflow.", + repoPath, + baseBranch: "main", + scenarios: { + spec: "ok", + phase_plan: "ok", + }, + }); + runIds.push(runId); + + const awaiting = await engine.getStatus(runId); + expect(awaiting.run.state).toBe("awaiting_approval"); + expect(awaiting.run.repoPath).toBe(repoPath); + expect(awaiting.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([ + ["spec", "awaiting_approval"], + ["phase_plan", "pending"], + ]); + expect(awaiting.approvals).toMatchObject([ + { + gateKey: "spec_approved", + state: "pending", + }, + ]); + + const specArtifacts = await client.db + .select({ schemaId: artifacts.schemaId, valid: artifacts.valid }) + .from(artifacts) + .where(eq(artifacts.runId, runId)); + expect(specArtifacts).toEqual([{ schemaId: "dev/spec@1", valid: true }]); + + const approvalId = awaiting.approvals[0]?.id; + expect(approvalId).toBeDefined(); + if (approvalId === undefined) { + throw new Error("approval id missing"); + } + const clientToken = randomUUID(); + await engine.signalApproval(runId, approvalId, "approve", clientToken, "approved"); + await engine.signalApproval(runId, approvalId, "approve", clientToken, "approved"); + + const awaitingPlanApproval = await engine.getStatus(runId); + expect(awaitingPlanApproval.run.state).toBe("awaiting_approval"); + expect(awaitingPlanApproval.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([ + ["spec", "completed"], + ["phase_plan", "awaiting_approval"], + ]); + const phasePlanApproval = pendingApproval(awaitingPlanApproval, "phase_plan_approved"); + const planClientToken = randomUUID(); + await engine.signalApproval( + runId, + phasePlanApproval.id, + "approve", + planClientToken, + "plan approved", + ); + + const completed = await engine.getStatus(runId); + expect(completed.run.state).toBe("completed"); + expect(completed.run.finalReportPath).toMatch(/\.report\.md$/); + expect(completed.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([ + ["spec", "completed"], + ["phase_plan", "completed"], + ["implement", "completed"], + ]); + expect(completed.approvals).toMatchObject([ + { + gateKey: "spec_approved", + state: "approved", + }, + { + gateKey: "phase_plan_approved", + state: "approved", + }, + ]); + + const [bindingCount, eventRows, finalRun] = await Promise.all([ + client.db.select().from(runBindings).where(eq(runBindings.runId, runId)), + client.db + .select({ seq: runEvents.seq, type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq), + client.db + .select({ finalReportPath: runs.finalReportPath, worktreeRoot: runs.worktreeRoot }) + .from(runs) + .where(eq(runs.id, runId)) + .limit(1), + ]); + expect(bindingCount.map((binding) => binding.roleId).sort()).toEqual([ + "phase_planner", + "spec_writer", + ]); + expect(eventRows.map((event) => event.seq)).toEqual( + eventRows.map((_, index) => BigInt(index + 1)), + ); + expect(eventRows.map((event) => event.type)).toContain("approval.resolved"); + expect(eventRows.filter((event) => event.type === "session.idle")).toHaveLength(3); + expect(eventRows.at(-1)?.type).toBe("run.completed"); + + const finalReportPath = finalRun[0]?.finalReportPath; + expect(finalReportPath).toBe(completed.run.finalReportPath); + expect(finalReportPath).not.toBeNull(); + if (finalReportPath === null || finalReportPath === undefined) { + throw new Error("final report path missing"); + } + expect(existsSync(finalReportPath)).toBe(true); + const reportJsonPath = finalReportPath.replace(/\.report\.md$/, ".report.json"); + const report = JSON.parse(readFileSync(reportJsonPath, "utf8")) as unknown; + expect(validateArtifact("common/final-report@1", report)).toEqual({ ok: true }); + expect(report).toMatchObject({ + runId, + status: "completed", + approvals: [ + { gateKey: "spec_approved", state: "approved" }, + { gateKey: "phase_plan_approved", state: "approved" }, + ], + unresolved: [], + }); + expect(finalRun[0]?.worktreeRoot).toBe(resolve(workspaceRoot, runId, "main")); + expect(readFileSync(join(finalRun[0]?.worktreeRoot ?? "", "README.md"), "utf8")).toContain( + "Engine fixture", + ); + + await client.db + .update(runs) + .set({ finalReportPath: null, updatedAt: new Date() }) + .where(eq(runs.id, runId)); + await engine.signalApproval( + runId, + phasePlanApproval.id, + "approve", + planClientToken, + "plan approved", + ); + const repairedReport = await engine.getStatus(runId); + expect(repairedReport.run.finalReportPath).toMatch(/\.report\.md$/); + expect(existsSync(repairedReport.run.finalReportPath ?? "")).toBe(true); + }); + + it("increments attempts before emitting skipped planned phases", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Plan a phase with no artifact so it is skipped.", + repoPath, + baseBranch: "main", + scenarios: { + phase_plan: "skip-only", + }, + }); + runIds.push(runId); + + const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved"); + await engine.signalApproval(runId, specApproval.id, "approve", randomUUID()); + const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved"); + await engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()); + + const phases = await client.db + .select({ + attempts: runPhases.attempts, + phaseKey: runPhases.phaseKey, + state: runPhases.state, + }) + .from(runPhases) + .where(eq(runPhases.runId, runId)); + expect(phases.find((phase) => phase.phaseKey === "documentation-note")).toMatchObject({ + attempts: 1, + state: "skipped", + }); + + const [skipEvent] = await client.db + .select({ idempotencyKey: runEvents.idempotencyKey, payload: runEvents.payload }) + .from(runEvents) + .where(and(eq(runEvents.runId, runId), eq(runEvents.type, "phase.skipped"))) + .limit(1); + expect(skipEvent?.idempotencyKey).toMatch(/:1$/); + expect(skipEvent?.payload).toMatchObject({ + attempt: 1, + phaseKey: "documentation-note", + }); + }); + + it("rejects duplicate active runs for the same canonical repo and base branch", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + maxConcurrentRuns: 100, + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "First active run.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + + await expect( + engine.startRun({ + requirementsMd: "Second active run for the same repo.", + repoPath, + baseBranch: "main", + }), + ).rejects.toMatchObject({ + code: "active_run_exists", + recoveryHint: JSON.stringify({ currentRunId: runId, currentState: "awaiting_approval" }), + }); + }); + + it("enforces the configured maximum concurrent active runs", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const candidateRepoPath = createGitRepo(); + tempRoots.push(workspaceRoot, candidateRepoPath); + const [template] = await client.db + .select({ hash: workflowTemplates.hash, id: workflowTemplates.id }) + .from(workflowTemplates) + .where(and(eq(workflowTemplates.name, "development"), eq(workflowTemplates.version, 1))) + .limit(1); + if (template === undefined) { + throw new Error("development@1 template missing"); + } + for (let index = 0; index < 4; index += 1) { + const activeRunId = randomUUID(); + const repoPath = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-active-repo-"))); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-engine-active-worktree-")), + ); + tempRoots.push(repoPath, worktreeRoot); + runIds.push(activeRunId); + await client.db.insert(runs).values({ + id: activeRunId, + templateId: template.id, + templateHash: template.hash, + state: "executing", + repoPath, + baseBranch: `branch-${index}`, + worktreeRoot, + }); + } + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + maxConcurrentRuns: 4, + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + await expect( + engine.startRun({ + requirementsMd: "This run should exceed the concurrency limit.", + repoPath: candidateRepoPath, + baseBranch: "main", + }), + ).rejects.toMatchObject({ + code: "max_concurrent_runs", + }); + }); + + it("pauses and resumes an approval wait without resolving the gate", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Pause while waiting for approval.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + + await engine.pauseRun(runId); + expect((await engine.getStatus(runId)).run.state).toBe("paused"); + + const [pendingApproval] = await client.db + .select({ id: approvalRequests.id, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(pendingApproval).toMatchObject({ state: "pending" }); + + if (pendingApproval === undefined) { + throw new Error("pending approval missing"); + } + await expect( + engine.signalApproval(runId, pendingApproval.id, "approve", randomUUID()), + ).rejects.toMatchObject({ + code: "approval_conflict", + }); + await expect( + engine.signalApproval(runId, pendingApproval.id, "reject", randomUUID()), + ).rejects.toMatchObject({ + code: "approval_conflict", + }); + expect((await engine.getStatus(runId)).run.state).toBe("paused"); + + await engine.resumeRun(runId); + expect((await engine.getStatus(runId)).run.state).toBe("awaiting_approval"); + }); + + it("resumes an active phase that observed a manual pause mid-mutation", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new PausesAfterPromptAcceptedFakeAdapter(client.db)), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Pause during prompt send and then resume the active phase.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + + const paused = await engine.getStatus(runId); + expect(paused.run.state).toBe("paused"); + expect(paused.phases.find((phase) => phase.phaseKey === "spec")).toMatchObject({ + attempts: 1, + state: "running", + }); + + await engine.resumeRun(runId); + + const resumed = await engine.getStatus(runId); + expect(resumed.run.state).toBe("awaiting_approval"); + expect(resumed.phases.find((phase) => phase.phaseKey === "spec")).toMatchObject({ + attempts: 1, + state: "awaiting_approval", + }); + expect(pendingApproval(resumed, "spec_approved")).toBeDefined(); + }); + + it("preserves a human-required artifact gate raised during startRun", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Keep the run paused when artifact repair is exhausted.", + repoPath, + baseBranch: "main", + scenarios: { + spec: { scenario: "invalid", repairScenario: "invalid" }, + }, + }); + runIds.push(runId); + + const status = await engine.getStatus(runId); + expect(status.run.state).toBe("paused"); + expect(status.run.finalReportPath).toBeNull(); + expect(status.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([ + ["spec", "failed"], + ["phase_plan", "pending"], + ]); + expect(status.approvals).toMatchObject([ + { + gateKey: "artifact_invalid_after_repair", + state: "pending", + }, + ]); + + await expect(engine.resumeRun(runId)).rejects.toMatchObject({ + code: "approval_conflict", + }); + const approvalId = status.approvals[0]?.id; + expect(approvalId).toBeDefined(); + if (approvalId === undefined) { + throw new Error("approval id missing"); + } + await expect( + engine.signalApproval(runId, approvalId, "approve", randomUUID()), + ).rejects.toMatchObject({ + code: "approval_conflict", + }); + await expect( + engine.signalApproval(runId, approvalId, "request_changes", randomUUID()), + ).rejects.toMatchObject({ + code: "approval_conflict", + }); + expect((await engine.getStatus(runId)).run.state).toBe("paused"); + + await engine.signalApproval(runId, approvalId, "reject", randomUUID()); + const rejected = await engine.getStatus(runId); + expect(rejected.run.state).toBe("failed"); + expect(rejected.run.finalReportPath).toMatch(/\.report\.md$/); + }); + + it("fails terminally with a report when approval-triggered advancement hits a fatal plan error", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Fail when the phase plan has an unbound role.", + repoPath, + baseBranch: "main", + scenarios: { + phase_plan: "unbound-role", + }, + }); + runIds.push(runId); + const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved"); + await engine.signalApproval(runId, specApproval.id, "approve", randomUUID()); + const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved"); + + await expect( + engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()), + ).rejects.toMatchObject({ + code: "internal_state_corruption", + }); + + const failed = await engine.getStatus(runId); + expect(failed.run.state).toBe("failed"); + expect(failed.run.finalReportPath).toMatch(/\.report\.md$/); + expect(failed.phases.map((phase) => phase.phaseKey)).toEqual(["spec", "phase_plan"]); + if (failed.run.finalReportPath === null) { + throw new Error("final report missing"); + } + expect(existsSync(failed.run.finalReportPath)).toBe(true); + const failedSessions = await client.db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.runId, runId)); + expect(failedSessions.every((session) => session.state === "FAILED_NEEDS_HUMAN")).toBe(true); + }); + + it("fails planned phase key collisions instead of silently skipping work", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Fail when planned phase keys collide with template phase keys.", + repoPath, + baseBranch: "main", + scenarios: { + phase_plan: "colliding-spec", + }, + }); + runIds.push(runId); + const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved"); + await engine.signalApproval(runId, specApproval.id, "approve", randomUUID()); + const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved"); + + await expect( + engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()), + ).rejects.toMatchObject({ + code: "internal_state_corruption", + }); + const failed = await engine.getStatus(runId); + expect(failed.run.state).toBe("failed"); + expect(failed.phases.map((phase) => phase.phaseKey)).toEqual(["spec", "phase_plan"]); + }); + + it("validates every planned phase role before inserting dynamic phases", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Fail when any planned role is unbound.", + repoPath, + baseBranch: "main", + scenarios: { + phase_plan: "mixed-unbound-role", + }, + }); + runIds.push(runId); + const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved"); + await engine.signalApproval(runId, specApproval.id, "approve", randomUUID()); + const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved"); + + await expect( + engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()), + ).rejects.toMatchObject({ + code: "internal_state_corruption", + }); + const failed = await engine.getStatus(runId); + expect(failed.run.state).toBe("failed"); + expect(failed.phases.map((phase) => phase.phaseKey)).toEqual(["spec", "phase_plan"]); + }); + + it("marks every session failed when a later planned phase fails fatally", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Fatal planned phase failures should clean every session.", + repoPath, + baseBranch: "main", + scenarios: { + phase_plan: "unknown-schema", + }, + }); + runIds.push(runId); + const specApproval = pendingApproval(await engine.getStatus(runId), "spec_approved"); + await engine.signalApproval(runId, specApproval.id, "approve", randomUUID()); + const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved"); + + await expect( + engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()), + ).rejects.toMatchObject({ + code: "fake_fixture_missing", + }); + const failed = await engine.getStatus(runId); + expect(failed.run.state).toBe("failed"); + expect(failed.run.finalReportPath).toMatch(/\.report\.md$/); + if (failed.run.finalReportPath === null) { + throw new Error("expected failed run to write a final report"); + } + expect(existsSync(failed.run.finalReportPath)).toBe(true); + const finalReport = JSON.parse( + readFileSync(failed.run.finalReportPath.replace(/\.report\.md$/, ".report.json"), "utf8"), + ) as { status?: unknown }; + expect(finalReport.status).toBe("failed"); + const sessions = await client.db + .select({ roleId: tuiSessions.roleId, state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.runId, runId)); + expect(sessions.length).toBeGreaterThanOrEqual(2); + expect(sessions.every((session) => session.state === "FAILED_NEEDS_HUMAN")).toBe(true); + }); + + it("does not start another pending phase when approval replay sees active work", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Approval replay must not run phases concurrently.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + const status = await engine.getStatus(runId); + const specApproval = pendingApproval(status, "spec_approved"); + const token = randomUUID(); + await client.db.insert(approvalDecisions).values({ + approvalRequestId: specApproval.id, + action: "approve", + idempotencyKey: `${specApproval.id}:approve:${token}`, + }); + await client.db + .update(approvalRequests) + .set({ state: "approved", resolvedAt: new Date() }) + .where(eq(approvalRequests.id, specApproval.id)); + const activePhaseId = randomUUID(); + const pendingPhaseId = randomUUID(); + await client.db.insert(runPhases).values([ + { + id: activePhaseId, + runId, + phaseKey: "implement-a", + seq: 3, + state: "running", + attempts: 1, + }, + { + id: pendingPhaseId, + runId, + phaseKey: "implement-b", + seq: 4, + state: "pending", + }, + ]); + await client.db + .update(runs) + .set({ state: "executing", currentPhaseId: activePhaseId }) + .where(eq(runs.id, runId)); + await client.db + .update(runPhases) + .set({ state: "completed", endedAt: new Date() }) + .where(and(eq(runPhases.runId, runId), eq(runPhases.phaseKey, "spec"))); + + await engine.signalApproval(runId, specApproval.id, "approve", token); + + const phases = await client.db + .select({ + phaseKey: runPhases.phaseKey, + state: runPhases.state, + attempts: runPhases.attempts, + }) + .from(runPhases) + .where(eq(runPhases.runId, runId)); + expect(phases.find((phase) => phase.phaseKey === "implement-a")).toMatchObject({ + state: "running", + attempts: 1, + }); + expect(phases.find((phase) => phase.phaseKey === "implement-b")).toMatchObject({ + state: "pending", + attempts: 0, + }); + }); + + it("reruns the same phase when approval requests changes", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Request changes to the spec before approving it.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + const firstApproval = (await engine.getStatus(runId)).approvals[0]; + expect(firstApproval).toBeDefined(); + if (firstApproval === undefined) { + throw new Error("first approval missing"); + } + + await engine.signalApproval(runId, firstApproval.id, "request_changes", randomUUID()); + const changed = await engine.getStatus(runId); + expect(changed.run.state).toBe("awaiting_approval"); + expect(changed.phases.map((phase) => [phase.phaseKey, phase.state, phase.attempts])).toEqual([ + ["spec", "awaiting_approval", 2], + ["phase_plan", "pending", 0], + ]); + expect(changed.approvals.map((approval) => [approval.gateKey, approval.state])).toEqual([ + ["spec_approved", "changes_requested"], + ["spec_approved", "pending"], + ]); + + const secondApproval = changed.approvals.find((approval) => approval.state === "pending"); + expect(secondApproval).toBeDefined(); + if (secondApproval === undefined) { + throw new Error("second approval missing"); + } + await engine.signalApproval(runId, secondApproval.id, "approve", randomUUID()); + + const phasePlanApproval = pendingApproval(await engine.getStatus(runId), "phase_plan_approved"); + await engine.signalApproval(runId, phasePlanApproval.id, "approve", randomUUID()); + + const completed = await engine.getStatus(runId); + expect(completed.run.state).toBe("completed"); + expect(completed.phases.map((phase) => [phase.phaseKey, phase.state, phase.attempts])).toEqual([ + ["spec", "completed", 2], + ["phase_plan", "completed", 1], + ["implement", "completed", 1], + ]); + }); + + it("aborts pending approvals and sessions, and stale approvals cannot resume the run", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Abort while waiting for approval.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + const pendingApproval = (await engine.getStatus(runId)).approvals[0]; + expect(pendingApproval).toBeDefined(); + if (pendingApproval === undefined) { + throw new Error("pending approval missing"); + } + + await engine.pauseRun(runId); + expect((await engine.getStatus(runId)).run.state).toBe("paused"); + + await engine.abortRun(runId, "user requested abort"); + const aborted = await engine.getStatus(runId); + expect(aborted.run.state).toBe("aborted"); + expect(aborted.approvals).toMatchObject([{ state: "aborted" }]); + const abortEvents = await client.db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(abortEvents.map((event) => event.type)).not.toContain("approval.resolved"); + const abortDecisions = await client.db + .select() + .from(approvalDecisions) + .where(eq(approvalDecisions.approvalRequestId, pendingApproval.id)); + expect(abortDecisions).toEqual([]); + expect(aborted.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([ + ["spec", "failed"], + ["phase_plan", "pending"], + ]); + const sessions = await client.db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.runId, runId)); + expect(sessions).toEqual([{ state: "FAILED_NEEDS_HUMAN" }]); + const [abortedRow] = await client.db + .select({ pausedFromState: runs.pausedFromState }) + .from(runs) + .where(eq(runs.id, runId)); + expect(abortedRow?.pausedFromState).toBeNull(); + + await expect( + engine.signalApproval(runId, pendingApproval.id, "approve", randomUUID()), + ).rejects.toMatchObject({ + code: "approval_conflict", + }); + expect((await engine.getStatus(runId)).run.state).toBe("aborted"); + }); + + it("sweeps non-terminal M4 runs on API startup recovery", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Leave a non-terminal run for restart recovery.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + + const swept = await sweepM4ProcessRestart(client.db, { runIds: [runId] }); + expect(swept.sweptRunIds).toContain(runId); + + const recovered = await engine.getStatus(runId); + expect(recovered.run.state).toBe("failed"); + expect(recovered.run.finalReportPath).toBeNull(); + expect(recovered.run.currentPhaseId).toBeNull(); + expect(recovered.approvals).toMatchObject([{ state: "aborted" }]); + expect(recovered.eventsTail.map((event) => event.type)).not.toContain("approval.resolved"); + const recoveredDecisions = await client.db + .select() + .from(approvalDecisions) + .where(eq(approvalDecisions.approvalRequestId, recovered.approvals[0]?.id ?? "")); + expect(recoveredDecisions).toEqual([]); + expect(recovered.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([ + ["spec", "failed"], + ["phase_plan", "pending"], + ]); + const sessions = await client.db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.runId, runId)); + expect(sessions).toEqual([{ state: "FAILED_NEEDS_HUMAN" }]); + expect(recovered.eventsTail.map((event) => event.type)).toContain("run.failed"); + }); + + it("records planning as the paused-from state when phase planning needs human recovery", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Phase planning should use the planning run state.", + repoPath, + baseBranch: "main", + scenarios: { + phase_plan: { scenario: "invalid", repairScenario: "invalid" }, + }, + }); + runIds.push(runId); + const approval = (await engine.getStatus(runId)).approvals[0]; + expect(approval).toBeDefined(); + if (approval === undefined) { + throw new Error("approval missing"); + } + + await engine.signalApproval(runId, approval.id, "approve", randomUUID()); + + const [run] = await client.db + .select({ state: runs.state, pausedFromState: runs.pausedFromState }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run).toMatchObject({ state: "paused", pausedFromState: "planning" }); + const status = await engine.getStatus(runId); + expect(status.phases.map((phase) => [phase.phaseKey, phase.state])).toEqual([ + ["spec", "completed"], + ["phase_plan", "failed"], + ]); + }); + + it("rejects reuse of an approval client token with a different action", async () => { + client = createDbClient(databaseUrl); + await seedDevelopmentRegistry(client.db); + const workspaceRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-engine-workspace-"))); + const repoPath = createGitRepo(); + tempRoots.push(workspaceRoot, repoPath); + const engine = new DbRunEngine({ + db: client.db, + sessions: sessionRuntime(client.db, new FakeSessionAdapter({ writeDelayMs: 0 })), + workspaceRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const { runId } = await engine.startRun({ + requirementsMd: "Check approval token conflict.", + repoPath, + baseBranch: "main", + }); + runIds.push(runId); + const [request] = await client.db + .select({ id: approvalRequests.id }) + .from(approvalRequests) + .where(and(eq(approvalRequests.runId, runId), eq(approvalRequests.state, "pending"))); + expect(request).toBeDefined(); + if (request === undefined) { + throw new Error("approval request missing"); + } + + const clientToken = randomUUID(); + await engine.signalApproval(runId, request.id, "approve", clientToken); + await expect( + engine.signalApproval(runId, request.id, "reject", clientToken), + ).rejects.toMatchObject({ + code: "approval_conflict", + }); + }); +}); + +function pendingApproval(status: Awaited>, gateKey: string) { + const approval = status.approvals.find( + (candidate) => candidate.gateKey === gateKey && candidate.state === "pending", + ); + expect(approval).toBeDefined(); + if (approval === undefined) { + throw new Error(`${gateKey} approval missing`); + } + return approval; +} + +async function seedDevelopmentRegistry(db: DbClient["db"]) { + const [templateEntry] = loadTemplateFiles(resolve("docs/schemas/templates")).filter( + (entry) => entry.name === "development" && entry.version === 1, + ); + if (templateEntry === undefined) { + throw new Error("development@1 template fixture is missing"); + } + await db + .insert(workflowTemplates) + .values({ + name: templateEntry.name, + version: templateEntry.version, + hash: templateEntry.hash, + definition: templateEntry.definition, + }) + .onConflictDoUpdate({ + target: [workflowTemplates.name, workflowTemplates.version], + set: { hash: templateEntry.hash, definition: templateEntry.definition }, + }); + + for (const personaEntry of loadPersonaFiles(resolve("docs/schemas/personas"))) { + await db + .insert(agentPersonas) + .values({ + name: personaEntry.name, + version: personaEntry.version, + hash: personaEntry.hash, + definition: personaEntry.definition, + }) + .onConflictDoNothing({ target: [agentPersonas.name, agentPersonas.version] }); + } +} diff --git a/packages/run-engine/src/engine.ts b/packages/run-engine/src/engine.ts new file mode 100644 index 0000000..1588b91 --- /dev/null +++ b/packages/run-engine/src/engine.ts @@ -0,0 +1,2336 @@ +import { execFile } from "node:child_process"; +import { createHash, randomUUID } from "node:crypto"; +import { realpathSync } from "node:fs"; +import { mkdir, readFile, rename, writeFile } from "node:fs/promises"; +import { dirname, join, relative, resolve } from "node:path"; +import { promisify } from "node:util"; + +import { + ApprovalDecisionAction, + type ApprovalDecisionAction as ApprovalDecisionActionValue, + type BackendConfig, + type BindingOverrides, + DevflowError, + Persona, + Template, + bindTemplatePersonas, + hash, + validateArtifact, +} from "@devflow/core"; +import { + type DbClient, + RunEventRepository, + agentPersonas, + approvalDecisions, + approvalRequests, + artifacts, + commands, + reviewFindings, + runBindings, + runEvents, + runInputs, + runPhases, + runs, + tuiSessions, + workflowTemplates, +} from "@devflow/db"; +import type { SessionRuntime } from "@devflow/session"; +import { and, asc, desc, eq, inArray, sql } from "drizzle-orm"; + +import { runSingleFakePhase } from "./fake-phase-harness.js"; + +type Database = DbClient["db"]; +type TransactionDb = Parameters[0]>[0]; + +const terminalRunStates = ["completed", "failed", "aborted"] as const; +const phaseMutationRunStates = ["executing", "planning"] as const; +const execFileAsync = promisify(execFile); + +export interface RunEngine { + startRun(input: RunStartInput): Promise<{ runId: string }>; + signalApproval( + runId: string, + approvalRequestId: string, + action: ApprovalDecisionActionValue, + clientToken: string, + comment?: string, + ): Promise; + pauseRun(runId: string): Promise; + resumeRun(runId: string): Promise; + abortRun(runId: string, reason: string): Promise; + getStatus(runId: string): Promise; +} + +export interface RunStartInput { + requirementsMd: string; + repoPath: string; + baseBranch: string; + templateName?: string; + templateVersion?: number; + worktreeRoot?: string; + objective?: unknown; + extra?: Record; + overrides?: Partial; + scenarios?: Record; + runId?: string; +} + +export type FakePhaseScenario = + | string + | { + scenario?: string; + repairScenario?: string; + }; + +export interface DbRunEngineOptions { + db: Database; + sessions: SessionRuntime; + workspaceRoot: string; + availableBackends?: readonly BackendConfig[]; + maxConcurrentRuns?: number; + wait?: { + timeoutMs?: number; + pollIntervalMs?: number; + stableMs?: number; + }; +} + +export interface RunStatus { + run: { + id: string; + state: string; + repoPath: string; + baseBranch: string; + worktreeRoot: string; + currentPhaseId: string | null; + finalReportPath: string | null; + startedAt: Date | null; + endedAt: Date | null; + }; + phases: Array<{ + id: string; + phaseKey: string; + seq: number; + state: string; + attempts: number; + }>; + approvals: Array<{ + id: string; + phaseId: string | null; + gateKey: string; + state: string; + }>; + eventsTail: Array<{ + id: string; + seq: string; + type: string; + payload: unknown; + ts: Date; + }>; +} + +interface TemplateRecord { + id: string; + hash: string; + definition: unknown; +} + +interface PersonaRecord { + id: string; + name: string; + version: number; + hash: string; + definition: unknown; +} + +interface StoredRunContext { + template: Template; + input: { + requirementsMd: string; + extra: unknown; + }; +} + +interface EnginePhaseDefinition { + key: string; + title: string; + roles: string[]; + expectedArtifact?: { + path: string; + schema: string; + }; + gates: string[]; + timeoutMs?: number; +} + +export class DbRunEngine implements RunEngine { + private readonly db: Database; + private readonly sessions: SessionRuntime; + private readonly workspaceRoot: string; + private readonly availableBackends: readonly BackendConfig[]; + private readonly maxConcurrentRuns: number; + private readonly wait: DbRunEngineOptions["wait"]; + + constructor(options: DbRunEngineOptions) { + this.db = options.db; + this.sessions = options.sessions; + this.workspaceRoot = realpathSync(resolve(options.workspaceRoot)); + this.availableBackends = options.availableBackends ?? [ + { id: "fake", enabled: true, binaryPath: undefined }, + ]; + this.maxConcurrentRuns = options.maxConcurrentRuns ?? 4; + this.wait = options.wait; + } + + async startRun(input: RunStartInput): Promise<{ runId: string }> { + const runId = input.runId ?? randomUUID(); + const templateName = input.templateName ?? "development"; + const templateVersion = input.templateVersion ?? 1; + const repoPath = canonicalExistingPath(input.repoPath); + const worktreeRoot = await this.resolveWorktreeRoot(runId, input.worktreeRoot); + const templateRecord = await this.loadTemplate(templateName, templateVersion); + const template = Template.parse(templateRecord.definition); + const personaRecords = await this.loadPersonas(); + const personas = personaRecords.map((row) => Persona.parse(row.definition)); + const inputExtra = storeEngineMetadata(input.extra, input.scenarios); + const inputHash = hash({ + templateHash: templateRecord.hash, + bindings: [], + requirementsMd: input.requirementsMd, + objective: input.objective ?? null, + repoPath, + baseBranch: input.baseBranch, + extra: inputExtra, + }); + + let runInserted = false; + try { + await this.db.transaction(async (tx) => { + await this.lockStartAttempt(tx, repoPath, input.baseBranch); + await this.assertRunCanStart(tx, repoPath, input.baseBranch); + await tx.insert(runs).values({ + id: runId, + templateId: templateRecord.id, + templateHash: templateRecord.hash, + state: "created", + repoPath, + baseBranch: input.baseBranch, + worktreeRoot, + }); + await tx.insert(runInputs).values({ + runId, + requirementsMd: input.requirementsMd, + objective: input.objective ?? null, + extra: inputExtra, + inputHash, + }); + await tx.insert(runPhases).values( + template.phases.map((phase, index) => ({ + runId, + phaseKey: phase.key, + seq: index + 1, + state: "pending", + })), + ); + await new RunEventRepository(this.db).appendInTransaction(tx, { + runId, + type: "run.created", + payload: { templateName, templateVersion }, + idempotencyKey: `run.created:${runId}`, + }); + }); + runInserted = true; + const canonicalWorktreeRoot = await this.createGitWorktree( + repoPath, + input.baseBranch, + runId, + worktreeRoot, + ); + if (canonicalWorktreeRoot !== worktreeRoot) { + await this.db + .update(runs) + .set({ worktreeRoot: canonicalWorktreeRoot, updatedAt: new Date() }) + .where(eq(runs.id, runId)); + } + } catch (error) { + if (isPgConstraintViolation(error, "ux_active_run_repo_base")) { + throw await this.activeRunConflict(repoPath, input.baseBranch); + } + if (runInserted) { + await this.markRunFailedIfActive(runId, "worktree_create_failed"); + } + throw error; + } + + try { + await this.lockBindings( + runId, + template, + templateRecord.hash, + personaRecords, + personas, + input, + ); + await this.advanceRun(runId); + } catch (error) { + if (await this.shouldPreserveHumanGateRun(runId, error)) { + return { runId }; + } + await this.markRunFailedIfActive(runId, "start_run_failed"); + throw error; + } + + return { runId }; + } + + private async lockStartAttempt( + tx: TransactionDb, + repoPath: string, + baseBranch: string, + ): Promise { + await tx.execute(sql`SELECT pg_advisory_xact_lock(hashtext('devflow:start-run-global'))`); + await tx.execute( + sql`SELECT pg_advisory_xact_lock(hashtext('devflow:start-run'), hashtext(${`${repoPath}:${baseBranch}`}))`, + ); + } + + private async assertRunCanStart( + tx: TransactionDb, + repoPath: string, + baseBranch: string, + ): Promise { + const [existing] = await activeRunForRepoBase(tx, repoPath, baseBranch); + if (existing !== undefined) { + throw activeRunExists(existing.id, existing.state); + } + + const [count] = await tx + .select({ value: sql`count(*)::int` }) + .from(runs) + .where(sql`${runs.state} NOT IN ('completed', 'failed', 'aborted')`); + if ((count?.value ?? 0) >= this.maxConcurrentRuns) { + throw new DevflowError("Maximum concurrent runs reached", { + class: "human_required", + code: "max_concurrent_runs", + recoveryHint: `maxConcurrentRuns=${this.maxConcurrentRuns}`, + }); + } + } + + private async activeRunConflict(repoPath: string, baseBranch: string): Promise { + const [existing] = await activeRunForRepoBase(this.db, repoPath, baseBranch); + return activeRunExists(existing?.id ?? "unknown", existing?.state ?? "unknown"); + } + + async signalApproval( + runId: string, + approvalRequestId: string, + action: ApprovalDecisionActionValue, + clientToken: string, + comment?: string, + ): Promise { + const parsedAction = ApprovalDecisionAction.parse(action); + const decision = await this.recordApprovalDecision( + runId, + approvalRequestId, + parsedAction, + clientToken, + comment, + ); + + if (parsedAction === "approve" || parsedAction === "request_changes") { + try { + await this.advanceRun(runId); + } catch (error) { + if (await this.shouldPreserveHumanGateRun(runId, error)) { + return; + } + await this.markRunFailedIfActive(runId, "approval_advance_failed"); + throw error; + } + return; + } + + if (parsedAction === "reject") { + await this.composeFinalReportBestEffort(runId, "failed"); + return; + } + + await this.composeFinalReportBestEffort(runId, "aborted"); + } + + async pauseRun(runId: string): Promise { + const eventRepository = new RunEventRepository(this.db); + await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined || isTerminalRunState(run.state)) { + return; + } + if (run.state === "paused") { + return; + } + if (!["planning", "executing", "awaiting_approval"].includes(run.state)) { + return; + } + + const cause = `signal:${randomUUID()}`; + await tx + .update(runs) + .set({ state: "paused", pausedFromState: run.state, updatedAt: new Date() }) + .where(eq(runs.id, runId)); + await eventRepository.appendInTransaction(tx, { + runId, + type: "run.paused", + payload: { cause, pausedFromState: run.state }, + idempotencyKey: `run.paused:${runId}:${cause}`, + }); + }); + } + + async resumeRun(runId: string): Promise { + const eventRepository = new RunEventRepository(this.db); + let shouldAdvance = false; + await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined || run.state !== "paused") { + return; + } + if (await hasPendingHumanRequiredGate(tx, runId)) { + throw approvalConflict(runId, "pending human-required gate must be resolved first"); + } + const nextState = run.pausedFromState ?? "executing"; + const cause = `signal:${randomUUID()}`; + await tx + .update(runs) + .set({ state: nextState, pausedFromState: null, updatedAt: new Date() }) + .where(eq(runs.id, runId)); + await eventRepository.appendInTransaction(tx, { + runId, + type: "run.resumed", + payload: { cause }, + idempotencyKey: `run.resumed:${runId}:${cause}`, + }); + shouldAdvance = nextState === "executing" || nextState === "planning"; + }); + + if (shouldAdvance) { + try { + await this.advanceRun(runId, { resumeActivePhase: true }); + } catch (error) { + if (await this.shouldPreserveHumanGateRun(runId, error)) { + return; + } + await this.markRunFailedIfActive(runId, "resume_advance_failed"); + throw error; + } + } + } + + async abortRun(runId: string, reason: string): Promise { + const eventRepository = new RunEventRepository(this.db); + let aborted = false; + let sessionsToDispose: string[] = []; + await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined || isTerminalRunState(run.state)) { + return; + } + await tx + .update(runs) + .set({ + state: "aborted", + currentPhaseId: null, + pausedFromState: null, + endedAt: new Date(), + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + await eventRepository.appendInTransaction(tx, { + runId, + type: "run.aborted", + payload: { reason }, + idempotencyKey: `run.aborted:${runId}`, + }); + await failActivePhasesInTransaction(tx, eventRepository, runId, "abort"); + await abortPendingApprovalsInTransaction(tx, runId); + sessionsToDispose = await markSessionsFailedInTransaction(tx, eventRepository, runId); + aborted = true; + }); + + if (aborted) { + await this.disposeSessions(sessionsToDispose); + await this.composeFinalReportBestEffort(runId, "aborted"); + } + } + + async getStatus(runId: string): Promise { + const [run] = await this.db + .select({ + id: runs.id, + state: runs.state, + repoPath: runs.repoPath, + baseBranch: runs.baseBranch, + worktreeRoot: runs.worktreeRoot, + currentPhaseId: runs.currentPhaseId, + finalReportPath: runs.finalReportPath, + startedAt: runs.startedAt, + endedAt: runs.endedAt, + }) + .from(runs) + .where(eq(runs.id, runId)) + .limit(1); + if (run === undefined) { + throw runNotFound(runId); + } + + const [phases, approvals, eventsTail] = await Promise.all([ + this.db + .select({ + id: runPhases.id, + phaseKey: runPhases.phaseKey, + seq: runPhases.seq, + state: runPhases.state, + attempts: runPhases.attempts, + }) + .from(runPhases) + .where(eq(runPhases.runId, runId)) + .orderBy(asc(runPhases.seq)), + this.db + .select({ + id: approvalRequests.id, + phaseId: approvalRequests.phaseId, + gateKey: approvalRequests.gateKey, + state: approvalRequests.state, + }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)) + .orderBy(asc(approvalRequests.createdAt)), + this.db + .select({ + id: runEvents.id, + seq: runEvents.seq, + type: runEvents.type, + payload: runEvents.payload, + ts: runEvents.ts, + }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(desc(runEvents.seq)) + .limit(20), + ]); + + return { + run, + phases, + approvals, + eventsTail: eventsTail.reverse().map((event) => ({ + id: event.id.toString(), + seq: event.seq.toString(), + type: event.type, + payload: event.payload, + ts: event.ts, + })), + }; + } + + private async lockBindings( + runId: string, + template: Template, + templateHash: string, + personaRecords: PersonaRecord[], + personas: TemplateCompatiblePersona[], + input: RunStartInput, + ): Promise { + const bindInput = { + runId, + template, + personas, + templateHash, + availableBackends: this.availableBackends, + ...(input.overrides === undefined ? {} : { overrides: input.overrides }), + }; + const result = bindTemplatePersonas(bindInput); + const personaRowsByIdentity = new Map( + personaRecords.map((row) => [`${row.name}@${row.version}`, row]), + ); + const bindingHashes = result.bindings + .map((binding) => binding.bindingHash) + .sort((left, right) => left.localeCompare(right)); + const inputHashWithBindings = hash({ + templateHash, + bindings: bindingHashes, + requirementsMd: input.requirementsMd, + objective: input.objective ?? null, + repoPath: canonicalExistingPath(input.repoPath), + baseBranch: input.baseBranch, + extra: storeEngineMetadata(input.extra, input.scenarios), + }); + + await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined || run.state !== "created") { + throw runStateChanged(runId, undefined, run?.state ?? "missing"); + } + await tx.insert(runBindings).values( + result.bindings.map((binding) => { + const personaRow = personaRowsByIdentity.get( + `${binding.persona.name}@${binding.persona.version}`, + ); + if (personaRow === undefined) { + throw new DevflowError("Binding persona row is missing", { + class: "fatal", + code: "internal_state_corruption", + runId, + }); + } + + return { + runId, + roleId: binding.roleId, + personaId: personaRow.id, + personaHash: binding.personaHash, + backend: binding.backend, + bindingHash: binding.bindingHash, + }; + }), + ); + await tx + .update(runInputs) + .set({ inputHash: inputHashWithBindings }) + .where(eq(runInputs.runId, runId)); + await tx + .update(runs) + .set({ state: "bound", startedAt: new Date(), updatedAt: new Date() }) + .where(and(eq(runs.id, runId), eq(runs.state, "created"))); + await new RunEventRepository(this.db).appendInTransaction(tx, { + runId, + type: "run.started", + payload: { templateHash }, + idempotencyKey: `run.started:${runId}`, + }); + }); + } + + private async advanceRun( + runId: string, + options: { resumeActivePhase?: boolean } = {}, + ): Promise { + while (true) { + const context = await this.loadRunContext(runId); + const [run] = await this.db + .select({ + state: runs.state, + currentPhaseId: runs.currentPhaseId, + finalReportPath: runs.finalReportPath, + worktreeRoot: runs.worktreeRoot, + }) + .from(runs) + .where(eq(runs.id, runId)) + .limit(1); + if (run === undefined) { + throw runNotFound(runId); + } + if (run.state === "bound") { + await this.promoteBoundRun(runId); + continue; + } + if (run.state === "awaiting_approval" || run.state === "paused") { + return; + } + if (isTerminalRunState(run.state)) { + if (run.finalReportPath === null) { + await this.composeFinalReportBestEffort(runId, run.state); + } + return; + } + if (run.state !== "executing" && run.state !== "planning") { + throw new DevflowError("Run is not executable", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: `run_state=${run.state}`, + }); + } + + const phaseDefinitions = [ + ...context.template.phases.map(toEnginePhaseDefinition), + ...(await this.loadPlannedPhaseDefinitions(runId, run.worktreeRoot)), + ]; + const activePhase = await this.activePhase(runId); + if (activePhase !== undefined) { + if (!options.resumeActivePhase || run.currentPhaseId !== activePhase.id) { + return; + } + await this.executePhase(runId, run.worktreeRoot, context, activePhase, phaseDefinitions); + continue; + } + const nextPhase = await this.nextPendingPhase(runId); + if (nextPhase === undefined) { + if ( + await this.ensurePlannedPhaseRows( + runId, + run.worktreeRoot, + context.template.phases.map((phase) => phase.key), + ) + ) { + continue; + } + await this.completeRun(runId); + return; + } + + await this.executePhase(runId, run.worktreeRoot, context, nextPhase, phaseDefinitions); + } + } + + private async promoteBoundRun(runId: string): Promise { + await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined || run.state !== "bound") { + return; + } + await tx + .update(runs) + .set({ state: "executing", updatedAt: new Date() }) + .where(and(eq(runs.id, runId), eq(runs.state, "bound"))); + }); + } + + private async executePhase( + runId: string, + worktreeRoot: string, + context: StoredRunContext, + phaseRow: { id: string; phaseKey: string }, + phaseDefinitions: readonly EnginePhaseDefinition[], + ): Promise { + const phaseDefinition = phaseDefinitions.find((phase) => phase.key === phaseRow.phaseKey); + if (phaseDefinition === undefined) { + throw new DevflowError("Run phase is missing from template", { + class: "fatal", + code: "internal_state_corruption", + runId, + phaseId: phaseRow.id, + }); + } + if (phaseDefinition.expectedArtifact === undefined) { + await this.setCurrentPhase(runId, phaseRow.id); + await this.skipPhase(runId, phaseRow.id, phaseRow.phaseKey); + await this.clearCurrentPhase(runId, phaseRow.id); + return; + } + + await this.prepareRunForPhase(runId, phaseRow.id, phaseDefinition.expectedArtifact.schema); + const binding = await this.bindingForPhase(runId, phaseDefinition.roles); + const expectedArtifactPath = resolve(worktreeRoot, phaseDefinition.expectedArtifact.path); + const wait = + phaseDefinition.timeoutMs === undefined + ? this.wait + : { ...this.wait, timeoutMs: phaseDefinition.timeoutMs }; + const workflowApprovalGateKey = phaseDefinition.gates[0]; + await this.sessions.trackOperation( + runSingleFakePhase({ + db: this.db, + sessions: this.sessions, + runId, + phaseId: phaseRow.id, + phaseKey: phaseRow.phaseKey, + roleId: binding.roleId, + worktreeRoot, + expectedArtifactPath, + expectedSchema: phaseDefinition.expectedArtifact.schema, + instructions: buildPhaseInstructions( + phaseRow.phaseKey, + phaseDefinition.title, + context.input.requirementsMd, + scenarioForPhase(context.input.extra, phaseRow.phaseKey), + ), + ...(wait === undefined ? {} : { wait }), + terminalRun: false, + ...(workflowApprovalGateKey === undefined + ? {} + : { + workflowApprovalGateKey, + workflowApprovalPayload: { + phaseKey: phaseRow.phaseKey, + title: phaseDefinition.title, + expectedArtifactPath, + expectedSchema: phaseDefinition.expectedArtifact.schema, + }, + }), + }), + ); + if (workflowApprovalGateKey === undefined) { + await this.clearCurrentPhase(runId, phaseRow.id); + } + } + + private async completeRun(runId: string): Promise { + const eventRepository = new RunEventRepository(this.db); + let completed = false; + await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined || isTerminalRunState(run.state)) { + return; + } + if (!isPhaseMutationRunState(run.state)) { + throw runStateChanged(runId, undefined, run.state); + } + completed = true; + await tx + .update(runs) + .set({ + state: "completed", + currentPhaseId: null, + pausedFromState: null, + endedAt: new Date(), + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + await eventRepository.appendInTransaction(tx, { + runId, + type: "run.completed", + payload: {}, + idempotencyKey: `run.completed:${runId}`, + }); + }); + if (!completed) { + return; + } + await this.composeFinalReportBestEffort(runId, "completed"); + } + + private async prepareRunForPhase( + runId: string, + phaseId: string, + expectedSchema: string, + ): Promise { + const state = expectedSchema === "dev/phase-plan@1" ? "planning" : "executing"; + await this.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(tx, runId, phaseId); + await tx + .update(runs) + .set({ state, currentPhaseId: phaseId, updatedAt: new Date() }) + .where(and(eq(runs.id, runId), inArray(runs.state, ["executing", "planning"]))); + }); + } + + private async setCurrentPhase(runId: string, phaseId: string): Promise { + await this.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(tx, runId, phaseId); + await tx + .update(runs) + .set({ currentPhaseId: phaseId, updatedAt: new Date() }) + .where(and(eq(runs.id, runId), inArray(runs.state, ["executing", "planning"]))); + }); + } + + private async clearCurrentPhase(runId: string, phaseId: string): Promise { + await this.db + .update(runs) + .set({ currentPhaseId: null, updatedAt: new Date() }) + .where(and(eq(runs.id, runId), eq(runs.currentPhaseId, phaseId))); + } + + private async recordApprovalDecision( + runId: string, + approvalRequestId: string, + action: ApprovalDecisionActionValue, + clientToken: string, + comment: string | undefined, + ): Promise<{ replayed: boolean }> { + const decisionIdempotencyKey = `${approvalRequestId}:${action}:${clientToken}`; + const eventRepository = new RunEventRepository(this.db); + let sessionsToDispose: string[] = []; + const result = await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined) { + throw runNotFound(runId); + } + await tx.execute( + sql`SELECT 1 FROM ${approvalRequests} WHERE ${approvalRequests.id} = ${approvalRequestId} FOR UPDATE`, + ); + const [request] = await tx + .select({ + id: approvalRequests.id, + runId: approvalRequests.runId, + phaseId: approvalRequests.phaseId, + state: approvalRequests.state, + }) + .from(approvalRequests) + .where(and(eq(approvalRequests.id, approvalRequestId), eq(approvalRequests.runId, runId))) + .limit(1); + if (request === undefined) { + throw new DevflowError("Approval request does not exist", { + class: "human_required", + code: "approval_not_found", + runId, + }); + } + + const existingDecision = await existingDecisionForToken(tx, approvalRequestId, clientToken); + if (existingDecision !== undefined) { + if (existingDecision.action !== action) { + throw approvalConflict(runId, "client token already used for a different action"); + } + return { replayed: true }; + } + if (isTerminalRunState(run.state)) { + throw approvalConflict(runId, `run_state=${run.state}`); + } + if (run.state !== "awaiting_approval" && run.state !== "paused") { + throw approvalConflict(runId, `run_state=${run.state}`); + } + if (run.state === "paused") { + const resolvesHumanRequiredGate = + (action === "reject" || action === "abort") && + (request.phaseId === null || + (await isHumanRequiredApprovalPhase(tx, runId, request.phaseId))); + if (!resolvesHumanRequiredGate) { + throw approvalConflict(runId, "paused runs must be resumed before approval decisions"); + } + } + if (request.state !== "pending") { + throw approvalConflict(runId, `approval_state=${request.state}`); + } + + await tx.insert(approvalDecisions).values({ + approvalRequestId, + action, + comment, + idempotencyKey: decisionIdempotencyKey, + }); + await tx + .update(approvalRequests) + .set({ state: approvalStateForAction(action), resolvedAt: new Date() }) + .where(eq(approvalRequests.id, approvalRequestId)); + await eventRepository.appendInTransaction(tx, { + runId, + ...(request.phaseId === null ? {} : { phaseId: request.phaseId }), + type: "approval.resolved", + payload: { approvalRequestId, action }, + idempotencyKey: `approval.resolved:${approvalRequestId}:${action}`, + }); + + if (action === "approve") { + if (request.phaseId !== null) { + await completeApprovedPhase(tx, eventRepository, runId, request.phaseId); + } + await tx + .update(runs) + .set({ + state: "executing", + currentPhaseId: null, + pausedFromState: null, + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + await eventRepository.appendInTransaction(tx, { + runId, + type: "run.resumed", + payload: { cause: `approval:${approvalRequestId}:${action}` }, + idempotencyKey: `run.resumed:${runId}:approval:${approvalRequestId}:${action}`, + }); + return { replayed: false }; + } + + if (action === "request_changes") { + if (request.phaseId !== null) { + await resetPhaseForChanges(tx, eventRepository, runId, request.phaseId); + } + await tx + .update(runs) + .set({ + state: "planning", + currentPhaseId: request.phaseId, + pausedFromState: null, + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + await eventRepository.appendInTransaction(tx, { + runId, + type: "run.resumed", + payload: { cause: `approval:${approvalRequestId}:${action}` }, + idempotencyKey: `run.resumed:${runId}:approval:${approvalRequestId}:${action}`, + }); + return { replayed: false }; + } + + const state: "aborted" | "failed" = action === "abort" ? "aborted" : "failed"; + await tx + .update(runs) + .set({ + state, + currentPhaseId: null, + pausedFromState: null, + endedAt: new Date(), + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + if (request.phaseId !== null) { + await failApprovalPhase(tx, eventRepository, runId, request.phaseId, action); + } + if (action === "abort" || action === "reject") { + await abortPendingApprovalsInTransaction(tx, runId); + sessionsToDispose = await markSessionsFailedInTransaction(tx, eventRepository, runId); + } + await eventRepository.appendInTransaction(tx, { + runId, + type: action === "abort" ? "run.aborted" : "run.failed", + payload: { reason: `approval_${action}` }, + idempotencyKey: `${action === "abort" ? "run.aborted" : "run.failed"}:${runId}`, + }); + return { replayed: false }; + }); + + if (sessionsToDispose.length > 0) { + await this.disposeSessions(sessionsToDispose); + } + + return result; + } + + private async composeFinalReport( + runId: string, + status: "completed" | "failed" | "aborted", + ): Promise { + const [run] = await this.db + .select({ + id: runs.id, + templateHash: runs.templateHash, + worktreeRoot: runs.worktreeRoot, + finalReportPath: runs.finalReportPath, + endedAt: runs.endedAt, + }) + .from(runs) + .where(eq(runs.id, runId)) + .limit(1); + if (run === undefined) { + throw runNotFound(runId); + } + + const endedAt = (run.endedAt ?? new Date()).toISOString(); + const report = await this.buildFinalReport(runId, run.templateHash, endedAt, status); + const validation = validateArtifact("common/final-report@1", report); + if (!validation.ok) { + throw new DevflowError("Composed final report failed schema validation", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: JSON.stringify(validation.errors), + }); + } + + const reportRoot = join(this.workspaceRoot, runId); + await mkdir(reportRoot, { recursive: true }); + const jsonPath = join(reportRoot, `${runId}.report.json`); + const markdownPath = join(reportRoot, `${runId}.report.md`); + await atomicWriteFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`); + await atomicWriteFile(markdownPath, renderMarkdownReport(report)); + await this.db + .update(runs) + .set({ + finalReportPath: markdownPath, + endedAt: new Date(endedAt), + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + return markdownPath; + } + + private async buildFinalReport( + runId: string, + templateHash: string, + endedAt: string, + status: "completed" | "failed" | "aborted", + ): Promise> { + const [input, bindings, phases, approvals, findings, commandRows, artifactRows, eventsTail] = + await Promise.all([ + this.db.select().from(runInputs).where(eq(runInputs.runId, runId)).limit(1), + this.db + .select({ + roleId: runBindings.roleId, + personaHash: runBindings.personaHash, + backend: runBindings.backend, + }) + .from(runBindings) + .where(eq(runBindings.runId, runId)) + .orderBy(asc(runBindings.roleId)), + this.db + .select() + .from(runPhases) + .where(eq(runPhases.runId, runId)) + .orderBy(asc(runPhases.seq)), + this.db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)) + .orderBy(asc(approvalRequests.createdAt)), + this.db + .select() + .from(reviewFindings) + .where(eq(reviewFindings.runId, runId)) + .orderBy(asc(reviewFindings.createdAt)), + this.db + .select({ + kind: commands.kind, + argv: commands.argv, + exitCode: commands.exitCode, + }) + .from(commands) + .where(eq(commands.runId, runId)) + .orderBy(asc(commands.createdAt)), + this.db + .select() + .from(artifacts) + .where(eq(artifacts.runId, runId)) + .orderBy(asc(artifacts.createdAt)), + this.db + .select({ + id: runEvents.id, + seq: runEvents.seq, + type: runEvents.type, + payload: runEvents.payload, + ts: runEvents.ts, + }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(desc(runEvents.seq)) + .limit(200), + ]); + + const unresolved = approvals + .filter((approval) => approval.state === "pending" || approval.state === "paused") + .map((approval) => ({ + type: "approval", + approvalId: approval.id, + gateKey: approval.gateKey, + state: approval.state, + })); + + return { + runId, + templateHash, + bindings, + inputs: serializeJson({ + requirementsMd: input[0]?.requirementsMd ?? "", + objective: input[0]?.objective ?? null, + extra: input[0]?.extra ?? null, + inputHash: input[0]?.inputHash ?? "", + }), + phases: serializeJson(phases), + approvals: serializeJson(approvals), + findings: serializeJson(findings), + commands: commandRows.map((command) => ({ + kind: command.kind, + argv: command.argv, + exit_code: command.exitCode, + })), + artifacts: serializeJson(artifactRows), + events: { + tail: eventsTail.reverse().map((event) => ({ + id: event.id.toString(), + seq: event.seq.toString(), + type: event.type, + payload: event.payload, + ts: event.ts.toISOString(), + })), + }, + unresolved, + endedAt, + status, + }; + } + + private async loadTemplate(name: string, version: number): Promise { + const [template] = await this.db + .select({ + id: workflowTemplates.id, + hash: workflowTemplates.hash, + definition: workflowTemplates.definition, + }) + .from(workflowTemplates) + .where(and(eq(workflowTemplates.name, name), eq(workflowTemplates.version, version))) + .limit(1); + if (template === undefined) { + throw new DevflowError("Workflow template is not seeded", { + class: "fatal", + code: "template_load_failed", + recoveryHint: `${name}@${version}`, + }); + } + + return template; + } + + private async loadPersonas(): Promise { + return this.db + .select({ + id: agentPersonas.id, + name: agentPersonas.name, + version: agentPersonas.version, + hash: agentPersonas.hash, + definition: agentPersonas.definition, + }) + .from(agentPersonas) + .orderBy(asc(agentPersonas.name), desc(agentPersonas.version)); + } + + private async loadRunContext(runId: string): Promise { + const [row] = await this.db + .select({ + templateDefinition: workflowTemplates.definition, + requirementsMd: runInputs.requirementsMd, + extra: runInputs.extra, + }) + .from(runs) + .innerJoin(workflowTemplates, eq(runs.templateId, workflowTemplates.id)) + .innerJoin(runInputs, eq(runInputs.runId, runs.id)) + .where(eq(runs.id, runId)) + .limit(1); + if (row === undefined) { + throw runNotFound(runId); + } + + return { + template: Template.parse(row.templateDefinition), + input: { + requirementsMd: row.requirementsMd, + extra: row.extra, + }, + }; + } + + private async nextPendingPhase( + runId: string, + ): Promise<{ id: string; phaseKey: string } | undefined> { + const [phase] = await this.db + .select({ id: runPhases.id, phaseKey: runPhases.phaseKey }) + .from(runPhases) + .where(and(eq(runPhases.runId, runId), eq(runPhases.state, "pending"))) + .orderBy(asc(runPhases.seq)) + .limit(1); + return phase; + } + + private async activePhase(runId: string): Promise<{ id: string; phaseKey: string } | undefined> { + const [phase] = await this.db + .select({ id: runPhases.id, phaseKey: runPhases.phaseKey }) + .from(runPhases) + .where( + and( + eq(runPhases.runId, runId), + inArray(runPhases.state, [ + "running", + "awaiting_artifact", + "validating", + "awaiting_approval", + ]), + ), + ) + .orderBy(asc(runPhases.seq)) + .limit(1); + return phase; + } + + private async bindingForPhase( + runId: string, + roleIds: readonly string[], + ): Promise<{ roleId: string }> { + const bindings = await this.db + .select({ roleId: runBindings.roleId }) + .from(runBindings) + .where(eq(runBindings.runId, runId)) + .orderBy(asc(runBindings.roleId)); + const binding = bindings.find((candidate) => + roleIds.some( + (roleId) => candidate.roleId === roleId || candidate.roleId.startsWith(`${roleId}#`), + ), + ); + if (binding === undefined) { + throw new DevflowError("No run binding satisfies phase roles", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: roleIds.join(","), + }); + } + + return binding; + } + + private async ensurePlannedPhaseRows( + runId: string, + worktreeRoot: string, + templatePhaseKeys: readonly string[], + ): Promise { + const plannedPhases = await this.loadPlannedPhaseDefinitions(runId, worktreeRoot); + if (plannedPhases.length === 0) { + return false; + } + assertPlannedPhaseKeys(runId, plannedPhases, templatePhaseKeys); + await this.assertPlannedPhaseBindings(runId, plannedPhases); + + return this.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(tx, runId); + const existingPhases = await tx + .select({ phaseKey: runPhases.phaseKey, seq: runPhases.seq }) + .from(runPhases) + .where(eq(runPhases.runId, runId)); + const existingKeys = new Set(existingPhases.map((phase) => phase.phaseKey)); + const missingPhases = plannedPhases.filter((phase) => !existingKeys.has(phase.key)); + if (missingPhases.length === 0) { + return false; + } + + const maxSeq = existingPhases.reduce((max, phase) => Math.max(max, phase.seq), 0); + await tx.insert(runPhases).values( + missingPhases.map((phase, index) => ({ + runId, + phaseKey: phase.key, + seq: maxSeq + index + 1, + state: "pending", + })), + ); + return true; + }); + } + + private async loadPlannedPhaseDefinitions( + runId: string, + _worktreeRoot: string, + ): Promise { + const [phasePlanArtifact] = await this.db + .select({ path: artifacts.path, hash: artifacts.hash, schemaId: artifacts.schemaId }) + .from(artifacts) + .where( + and( + eq(artifacts.runId, runId), + eq(artifacts.schemaId, "dev/phase-plan@1"), + eq(artifacts.valid, true), + ), + ) + .orderBy(desc(artifacts.createdAt)) + .limit(1); + if (phasePlanArtifact === undefined) { + return []; + } + + const bytes = await readFile(phasePlanArtifact.path); + const currentHash = sha256Hex(bytes); + if (currentHash !== phasePlanArtifact.hash) { + throw new DevflowError("Phase plan artifact changed after validation", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: phasePlanArtifact.path, + }); + } + const parsed = JSON.parse(bytes.toString("utf8")) as unknown; + const validation = validateArtifact(phasePlanArtifact.schemaId, parsed); + if (!validation.ok) { + throw new DevflowError("Stored phase plan artifact no longer validates", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: JSON.stringify(validation.errors), + }); + } + return parsePhasePlanDefinitions(runId, parsed); + } + + private async assertPlannedPhaseBindings( + runId: string, + plannedPhases: readonly EnginePhaseDefinition[], + ): Promise { + for (const phase of plannedPhases) { + await this.assertAllPhaseRolesBound(runId, phase.roles); + } + } + + private async assertAllPhaseRolesBound(runId: string, roleIds: readonly string[]): Promise { + const bindings = await this.db + .select({ roleId: runBindings.roleId }) + .from(runBindings) + .where(eq(runBindings.runId, runId)) + .orderBy(asc(runBindings.roleId)); + const missingRoles = roleIds.filter( + (roleId) => + !bindings.some( + (binding) => binding.roleId === roleId || binding.roleId.startsWith(`${roleId}#`), + ), + ); + if (missingRoles.length > 0) { + throw new DevflowError("Planned phase role is not bound", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: missingRoles.join(","), + }); + } + } + + private async skipPhase(runId: string, phaseId: string, phaseKey: string): Promise { + const eventRepository = new RunEventRepository(this.db); + await this.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(tx, runId, phaseId); + const [phase] = await tx + .update(runPhases) + .set({ + attempts: sql`${runPhases.attempts} + 1`, + state: "skipped", + endedAt: new Date(), + }) + .where( + and( + eq(runPhases.id, phaseId), + eq(runPhases.runId, runId), + eq(runPhases.state, "pending"), + ), + ) + .returning({ attempts: runPhases.attempts }); + if (phase === undefined) { + return; + } + const attempt = phase?.attempts ?? 1; + await eventRepository.appendInTransaction(tx, { + runId, + phaseId, + type: "phase.skipped", + payload: { phaseKey, attempt }, + idempotencyKey: `phase.skipped:${phaseId}:${attempt}`, + }); + }); + } + + private async markRunFailedIfActive(runId: string, reason: string): Promise { + const eventRepository = new RunEventRepository(this.db); + let sessionsToDispose: string[] = []; + let markedFailed = false; + let reportStatus: "completed" | "failed" | "aborted" | undefined; + await this.db.transaction(async (tx) => { + const [run] = await lockRun(tx, runId); + if (run === undefined) { + return; + } + if (isTerminalRunState(run.state)) { + if (run.finalReportPath === null) { + reportStatus = run.state; + } + return; + } + markedFailed = true; + reportStatus = "failed"; + await failActivePhasesInTransaction(tx, eventRepository, runId, reason); + await tx + .update(runs) + .set({ + state: "failed", + currentPhaseId: null, + pausedFromState: null, + endedAt: new Date(), + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + await eventRepository.appendInTransaction(tx, { + runId, + type: "run.failed", + payload: { reason }, + idempotencyKey: `run.failed:${runId}`, + }); + sessionsToDispose = await markSessionsFailedInTransaction(tx, eventRepository, runId); + }); + if (markedFailed) { + await this.disposeSessions(sessionsToDispose); + } + if (reportStatus !== undefined) { + await this.composeFinalReportBestEffort(runId, reportStatus); + } + } + + private async composeFinalReportBestEffort( + runId: string, + status: "completed" | "failed" | "aborted", + ): Promise { + try { + await this.composeFinalReport(runId, status); + return; + } catch { + await this.writeStubFinalReport(runId, status).catch(() => undefined); + } + } + + private async writeStubFinalReport( + runId: string, + status: "completed" | "failed" | "aborted", + ): Promise { + const [run] = await this.db + .select({ templateHash: runs.templateHash, endedAt: runs.endedAt }) + .from(runs) + .where(eq(runs.id, runId)) + .limit(1); + const endedAt = (run?.endedAt ?? new Date()).toISOString(); + const report = { + runId, + templateHash: run?.templateHash ?? "0".repeat(64), + bindings: [], + inputs: {}, + phases: [], + approvals: [], + findings: [], + commands: [], + artifacts: [], + events: { tail: [] }, + unresolved: ["final_report_compose_failed"], + endedAt, + status, + }; + const reportRoot = join(this.workspaceRoot, runId); + await mkdir(reportRoot, { recursive: true }); + const jsonPath = join(reportRoot, `${runId}.report.json`); + const markdownPath = join(reportRoot, `${runId}.report.md`); + await atomicWriteFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`); + await atomicWriteFile(markdownPath, renderMarkdownReport(report)); + await this.db + .update(runs) + .set({ + finalReportPath: markdownPath, + endedAt: new Date(endedAt), + updatedAt: new Date(), + }) + .where(eq(runs.id, runId)); + } + + private async shouldPreserveHumanGateRun(runId: string, error: unknown): Promise { + if (!(error instanceof DevflowError) || error.class !== "human_required") { + return false; + } + + const [run] = await this.db + .select({ state: runs.state }) + .from(runs) + .where(eq(runs.id, runId)) + .limit(1); + return run?.state === "paused" || run?.state === "awaiting_approval"; + } + + async recoverMissingFinalReports( + options: { runIds?: readonly string[] } = {}, + ): Promise { + const conditions = [ + inArray(runs.state, ["completed", "failed", "aborted"]), + sql`${runs.finalReportPath} IS NULL`, + ]; + if (options.runIds !== undefined) { + if (options.runIds.length === 0) { + return []; + } + conditions.push(inArray(runs.id, [...options.runIds])); + } + const terminalRuns = await this.db + .select({ id: runs.id, state: runs.state }) + .from(runs) + .where(and(...conditions)); + + const recoveredRunIds: string[] = []; + for (const run of terminalRuns) { + await this.composeFinalReportBestEffort( + run.id, + run.state as "completed" | "failed" | "aborted", + ); + const [updated] = await this.db + .select({ finalReportPath: runs.finalReportPath }) + .from(runs) + .where(eq(runs.id, run.id)) + .limit(1); + if (updated?.finalReportPath !== null && updated?.finalReportPath !== undefined) { + recoveredRunIds.push(run.id); + } + } + return recoveredRunIds; + } + + private async resolveWorktreeRoot( + runId: string, + requestedWorktreeRoot?: string, + ): Promise { + const runRoot = join(this.workspaceRoot, runId); + const worktreeRoot = requestedWorktreeRoot ?? join(runRoot, "main"); + if (!isPathInsideOrEqual(resolve(worktreeRoot), resolve(runRoot))) { + throw new DevflowError("Worktree root must live under the run workspace root", { + class: "fatal", + code: "workspace_permissions", + recoveryHint: worktreeRoot, + }); + } + await mkdir(runRoot, { recursive: true }); + const canonicalRunRoot = realpathSync(runRoot); + const resolvedWorktreeRoot = resolve(worktreeRoot); + await mkdir(dirname(resolvedWorktreeRoot), { recursive: true }); + if (!isPathInsideOrEqual(resolvedWorktreeRoot, canonicalRunRoot)) { + throw new DevflowError("Resolved worktree root escaped the run workspace root", { + class: "fatal", + code: "workspace_permissions", + recoveryHint: resolvedWorktreeRoot, + }); + } + return resolvedWorktreeRoot; + } + + private async createGitWorktree( + repoPath: string, + baseBranch: string, + runId: string, + worktreeRoot: string, + ): Promise { + const branchName = `devflow/${runId}/main`; + try { + await execFileAsync( + "git", + ["-C", repoPath, "worktree", "add", "-b", branchName, worktreeRoot, baseBranch], + { env: gitChildEnv(), maxBuffer: 1024 * 1024 }, + ); + return realpathSync(worktreeRoot); + } catch (cause) { + throw new DevflowError("Failed to create git worktree", { + class: "human_required", + code: "workspace_permissions", + runId, + cause, + recoveryHint: `git worktree add -b ${branchName} ${worktreeRoot} ${baseBranch}`, + }); + } + } + + private async disposeSessions(sessionIds: readonly string[]): Promise { + await Promise.all( + sessionIds.map((sessionId) => this.sessions.dispose({ sessionId }).catch(() => undefined)), + ); + } +} + +export interface M4ProcessRestartSweepOptions { + runIds?: readonly string[]; +} + +export async function sweepM4ProcessRestart( + db: Database, + options: M4ProcessRestartSweepOptions = {}, +): Promise<{ sweptRunIds: string[]; failedSessionIds: string[] }> { + if (options.runIds !== undefined && options.runIds.length === 0) { + return { sweptRunIds: [], failedSessionIds: [] }; + } + + const eventRepository = new RunEventRepository(db); + const sweptRunIds: string[] = []; + const failedSessionIds: string[] = []; + const activeRunFilter = + options.runIds === undefined + ? sql`${runs.state} NOT IN ('completed', 'failed', 'aborted')` + : and( + inArray(runs.id, [...options.runIds]), + sql`${runs.state} NOT IN ('completed', 'failed', 'aborted')`, + ); + + await db.transaction(async (tx) => { + const activeRuns = await tx + .select({ id: runs.id }) + .from(runs) + .where(activeRunFilter) + .orderBy(asc(runs.createdAt)); + + for (const activeRun of activeRuns) { + const [run] = await lockRun(tx, activeRun.id); + if (run === undefined || isTerminalRunState(run.state)) { + continue; + } + + await failActivePhasesInTransaction( + tx, + eventRepository, + activeRun.id, + "process_restart_unrecovered", + ); + await tx + .update(runs) + .set({ + state: "failed", + currentPhaseId: null, + pausedFromState: null, + finalReportPath: null, + endedAt: new Date(), + updatedAt: new Date(), + }) + .where(eq(runs.id, activeRun.id)); + await eventRepository.appendInTransaction(tx, { + runId: activeRun.id, + type: "run.failed", + payload: { reason: "process_restart_unrecovered" }, + idempotencyKey: `run.failed:${activeRun.id}`, + }); + await abortPendingApprovalsInTransaction(tx, activeRun.id); + failedSessionIds.push( + ...(await markSessionsFailedInTransaction(tx, eventRepository, activeRun.id)), + ); + sweptRunIds.push(activeRun.id); + } + }); + + return { sweptRunIds, failedSessionIds }; +} + +type TemplateCompatiblePersona = Persona; + +async function activeRunForRepoBase( + db: Database | TransactionDb, + repoPath: string, + baseBranch: string, +) { + return db + .select({ id: runs.id, state: runs.state }) + .from(runs) + .where( + and( + eq(runs.repoPath, repoPath), + eq(runs.baseBranch, baseBranch), + sql`${runs.state} NOT IN ('completed', 'failed', 'aborted')`, + ), + ) + .orderBy(asc(runs.createdAt)) + .limit(1); +} + +function activeRunExists(currentRunId: string, currentState: string): DevflowError { + return new DevflowError("An active run already exists for this repo and base branch", { + class: "human_required", + code: "active_run_exists", + recoveryHint: JSON.stringify({ currentRunId, currentState }), + }); +} + +function isPgConstraintViolation(error: unknown, constraint: string): boolean { + return ( + typeof error === "object" && + error !== null && + "constraint" in error && + (error as { constraint?: unknown }).constraint === constraint + ); +} + +async function lockRun(tx: TransactionDb, runId: string) { + await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${runId} FOR UPDATE`); + return tx + .select({ + state: runs.state, + pausedFromState: runs.pausedFromState, + finalReportPath: runs.finalReportPath, + }) + .from(runs) + .where(eq(runs.id, runId)) + .limit(1); +} + +async function assertRunCanMutatePhaseInTransaction( + tx: TransactionDb, + runId: string, + phaseId?: string, +) { + const [run] = await lockRun(tx, runId); + if (run === undefined || !isPhaseMutationRunState(run.state)) { + throw runStateChanged(runId, phaseId, run?.state ?? "missing"); + } +} + +function isPhaseMutationRunState(state: string): boolean { + return phaseMutationRunStates.includes(state as (typeof phaseMutationRunStates)[number]); +} + +function runStateChanged(runId: string, phaseId: string | undefined, state: string): DevflowError { + return new DevflowError("Run left active state before engine mutation", { + class: "human_required", + code: "run_state_changed", + runId, + ...(phaseId === undefined ? {} : { phaseId }), + recoveryHint: `run_state=${state}`, + }); +} + +async function hasPendingHumanRequiredGate(tx: TransactionDb, runId: string): Promise { + const pendingGates = await tx + .select({ phaseId: approvalRequests.phaseId, phaseState: runPhases.state }) + .from(approvalRequests) + .leftJoin(runPhases, eq(approvalRequests.phaseId, runPhases.id)) + .where(and(eq(approvalRequests.runId, runId), eq(approvalRequests.state, "pending"))); + + return pendingGates.some((gate) => gate.phaseId === null || gate.phaseState === "failed"); +} + +async function isHumanRequiredApprovalPhase( + tx: TransactionDb, + runId: string, + phaseId: string, +): Promise { + const [phase] = await tx + .select({ state: runPhases.state }) + .from(runPhases) + .where(and(eq(runPhases.id, phaseId), eq(runPhases.runId, runId))) + .limit(1); + return phase?.state === "failed"; +} + +async function abortPendingApprovalsInTransaction(tx: TransactionDb, runId: string) { + const pendingApprovals = await tx + .select({ + id: approvalRequests.id, + }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + + for (const approval of pendingApprovals.filter((request) => request.id !== undefined)) { + await tx + .update(approvalRequests) + .set({ state: "aborted", resolvedAt: new Date() }) + .where(and(eq(approvalRequests.id, approval.id), eq(approvalRequests.state, "pending"))); + } +} + +async function failActivePhasesInTransaction( + tx: TransactionDb, + eventRepository: RunEventRepository, + runId: string, + reason: string, +) { + const activePhases = await tx + .select({ + id: runPhases.id, + phaseKey: runPhases.phaseKey, + attempts: runPhases.attempts, + }) + .from(runPhases) + .where( + and( + eq(runPhases.runId, runId), + inArray(runPhases.state, [ + "running", + "awaiting_artifact", + "validating", + "awaiting_approval", + ]), + ), + ); + + for (const phase of activePhases) { + const attempt = Math.max(phase.attempts, 1); + const [updated] = await tx + .update(runPhases) + .set({ state: "failed", endedAt: new Date() }) + .where( + and( + eq(runPhases.id, phase.id), + inArray(runPhases.state, [ + "running", + "awaiting_artifact", + "validating", + "awaiting_approval", + ]), + ), + ) + .returning({ id: runPhases.id }); + if (updated === undefined) { + continue; + } + await eventRepository.appendInTransaction(tx, { + runId, + phaseId: phase.id, + type: "phase.failed", + payload: { phaseKey: phase.phaseKey, attempt, reason }, + idempotencyKey: `phase.failed:${phase.id}:${attempt}`, + }); + } +} + +async function markSessionsFailedInTransaction( + tx: TransactionDb, + eventRepository: RunEventRepository, + runId: string, +): Promise { + const sessions = await tx + .select({ id: tuiSessions.id, roleId: tuiSessions.roleId }) + .from(tuiSessions) + .where(eq(tuiSessions.runId, runId)); + const activeSessions = sessions.filter((session) => session.id !== undefined); + if (activeSessions.length === 0) { + return []; + } + + await tx + .update(tuiSessions) + .set({ state: "FAILED_NEEDS_HUMAN" }) + .where(eq(tuiSessions.runId, runId)); + for (const session of activeSessions) { + await eventRepository.appendInTransaction(tx, { + runId, + type: "session.failed", + payload: { sessionId: session.id, roleId: session.roleId }, + idempotencyKey: `session.failed:${session.id}`, + }); + } + + return activeSessions.map((session) => session.id); +} + +async function completeApprovedPhase( + tx: TransactionDb, + eventRepository: RunEventRepository, + runId: string, + phaseId: string, +) { + const [phase] = await tx + .select({ phaseKey: runPhases.phaseKey, attempts: runPhases.attempts }) + .from(runPhases) + .where(and(eq(runPhases.id, phaseId), eq(runPhases.runId, runId))) + .limit(1); + if (phase === undefined) { + throw new DevflowError("Approval phase does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId, + phaseId, + }); + } + + await tx + .update(runPhases) + .set({ state: "completed", endedAt: new Date() }) + .where(eq(runPhases.id, phaseId)); + await releaseWaitingApprovalSessions(tx, eventRepository, runId, phaseId); + await eventRepository.appendInTransaction(tx, { + runId, + phaseId, + type: "phase.completed", + payload: { phaseKey: phase.phaseKey, attempt: phase.attempts }, + idempotencyKey: `phase.completed:${phaseId}:${phase.attempts}`, + }); +} + +async function resetPhaseForChanges( + tx: TransactionDb, + eventRepository: RunEventRepository, + runId: string, + phaseId: string, +) { + const [phase] = await tx + .select({ id: runPhases.id }) + .from(runPhases) + .where(and(eq(runPhases.id, phaseId), eq(runPhases.runId, runId))) + .limit(1); + if (phase === undefined) { + throw new DevflowError("Approval phase does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId, + phaseId, + }); + } + await tx + .update(runPhases) + .set({ state: "pending", startedAt: null, endedAt: null }) + .where(eq(runPhases.id, phaseId)); + await releaseWaitingApprovalSessions(tx, eventRepository, runId, phaseId); +} + +async function releaseWaitingApprovalSessions( + tx: TransactionDb, + eventRepository: RunEventRepository, + runId: string, + phaseId: string, +) { + const waitingSessions = await tx + .select({ + id: tuiSessions.id, + lastPromptHash: tuiSessions.lastPromptHash, + roleId: tuiSessions.roleId, + }) + .from(tuiSessions) + .where(and(eq(tuiSessions.runId, runId), eq(tuiSessions.state, "WAITING_FOR_APPROVAL"))); + for (const session of waitingSessions) { + if (session.lastPromptHash === null) { + throw new DevflowError("Approval-waiting session is missing prompt hash", { + class: "fatal", + code: "internal_state_corruption", + runId, + phaseId, + recoveryHint: `session_id=${session.id}`, + }); + } + await eventRepository.appendInTransaction(tx, { + runId, + phaseId, + type: "session.idle", + payload: { + sessionId: session.id, + roleId: session.roleId, + dedupKey: session.lastPromptHash, + }, + idempotencyKey: `session.idle:${session.id}:${session.lastPromptHash}`, + }); + } + await tx + .update(tuiSessions) + .set({ state: "READY" }) + .where(and(eq(tuiSessions.runId, runId), eq(tuiSessions.state, "WAITING_FOR_APPROVAL"))); +} + +async function failApprovalPhase( + tx: TransactionDb, + eventRepository: RunEventRepository, + runId: string, + phaseId: string, + action: "reject" | "abort", +) { + const [phase] = await tx + .select({ phaseKey: runPhases.phaseKey, attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(and(eq(runPhases.id, phaseId), eq(runPhases.runId, runId))) + .limit(1); + if (phase === undefined) { + return; + } + if (phase.state === "failed") { + return; + } + await tx + .update(runPhases) + .set({ state: "failed", endedAt: new Date() }) + .where(eq(runPhases.id, phaseId)); + await eventRepository.appendInTransaction(tx, { + runId, + phaseId, + type: "phase.failed", + payload: { phaseKey: phase.phaseKey, attempt: phase.attempts, reason: `approval_${action}` }, + idempotencyKey: `phase.failed:${phaseId}:${phase.attempts}`, + }); +} + +async function existingDecisionForToken( + tx: TransactionDb, + approvalRequestId: string, + clientToken: string, +): Promise<{ action: string } | undefined> { + const decisions = await tx + .select({ + action: approvalDecisions.action, + idempotencyKey: approvalDecisions.idempotencyKey, + }) + .from(approvalDecisions) + .where(eq(approvalDecisions.approvalRequestId, approvalRequestId)); + return decisions.find((decision) => decision.idempotencyKey.endsWith(`:${clientToken}`)); +} + +function approvalStateForAction(action: ApprovalDecisionActionValue) { + switch (action) { + case "approve": + return "approved"; + case "reject": + return "rejected"; + case "request_changes": + return "changes_requested"; + case "abort": + return "aborted"; + } +} + +function toEnginePhaseDefinition(phase: Template["phases"][number]): EnginePhaseDefinition { + const definition: EnginePhaseDefinition = { + key: phase.key, + title: phase.title, + roles: [...phase.roles], + gates: [...phase.gates], + }; + if (phase.expectedArtifact !== undefined) { + definition.expectedArtifact = { + path: phase.expectedArtifact.path, + schema: phase.expectedArtifact.schema, + }; + } + if (phase.timeoutMs !== undefined) { + definition.timeoutMs = phase.timeoutMs; + } + return definition; +} + +function parsePhasePlanDefinitions(runId: string, value: unknown): EnginePhaseDefinition[] { + if ( + value === null || + typeof value !== "object" || + !Array.isArray((value as { phases?: unknown }).phases) + ) { + throw new DevflowError("Phase plan artifact is missing phases", { + class: "fatal", + code: "internal_state_corruption", + runId, + }); + } + + return (value as { phases: unknown[] }).phases.map((phase, index) => + parsePhasePlanDefinition(runId, phase, index), + ); +} + +function assertPlannedPhaseKeys( + runId: string, + plannedPhases: readonly EnginePhaseDefinition[], + templatePhaseKeys: readonly string[], +) { + const seen = new Set(); + const templateKeys = new Set(templatePhaseKeys); + for (const phase of plannedPhases) { + if (seen.has(phase.key)) { + throw new DevflowError("Phase plan contains duplicate phase keys", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: phase.key, + }); + } + seen.add(phase.key); + if (templateKeys.has(phase.key)) { + throw new DevflowError("Phase plan phase key collides with template phase key", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: phase.key, + }); + } + } +} + +function parsePhasePlanDefinition( + runId: string, + phase: unknown, + index: number, +): EnginePhaseDefinition { + if (phase === null || typeof phase !== "object") { + throw invalidPhasePlan(runId, index); + } + const record = phase as Record; + if ( + typeof record.key !== "string" || + typeof record.title !== "string" || + !Array.isArray(record.roles) + ) { + throw invalidPhasePlan(runId, index); + } + const roles = record.roles.filter((role): role is string => typeof role === "string"); + if (roles.length !== record.roles.length || roles.length === 0) { + throw invalidPhasePlan(runId, index); + } + + const gates = + Array.isArray(record.gates) && record.gates.every((gate) => typeof gate === "string") + ? record.gates + : []; + const definition: EnginePhaseDefinition = { + key: record.key, + title: record.title, + roles, + gates, + }; + if (record.expectedArtifact !== undefined) { + if (record.expectedArtifact === null || typeof record.expectedArtifact !== "object") { + throw invalidPhasePlan(runId, index); + } + const expectedArtifact = record.expectedArtifact as Record; + if (typeof expectedArtifact.path !== "string" || typeof expectedArtifact.schema !== "string") { + throw invalidPhasePlan(runId, index); + } + definition.expectedArtifact = { + path: expectedArtifact.path, + schema: expectedArtifact.schema, + }; + } + if (typeof record.timeoutMs === "number" && Number.isInteger(record.timeoutMs)) { + definition.timeoutMs = record.timeoutMs; + } + + return definition; +} + +function invalidPhasePlan(runId: string, index: number): DevflowError { + return new DevflowError("Phase plan artifact contains an invalid phase", { + class: "fatal", + code: "internal_state_corruption", + runId, + recoveryHint: `phase_index=${index}`, + }); +} + +function storeEngineMetadata( + extra: Record | undefined, + scenarios: Record | undefined, +): Record { + return { + ...(extra ?? {}), + devflowM4: { + scenarios: scenarios ?? {}, + }, + }; +} + +function scenarioForPhase(extra: unknown, phaseKey: string): Required { + const scenario = readScenario(extra, phaseKey); + if (typeof scenario === "string") { + return { scenario, repairScenario: "ok" }; + } + + return { + scenario: scenario?.scenario ?? "ok", + repairScenario: scenario?.repairScenario ?? "ok", + }; +} + +interface FakePhaseScenarioObject { + scenario?: string; + repairScenario?: string; +} + +function readScenario(extra: unknown, phaseKey: string): FakePhaseScenario | undefined { + if (extra === null || typeof extra !== "object" || !("devflowM4" in extra)) { + return undefined; + } + const metadata = (extra as { devflowM4?: unknown }).devflowM4; + if (metadata === null || typeof metadata !== "object" || !("scenarios" in metadata)) { + return undefined; + } + const scenarios = (metadata as { scenarios?: unknown }).scenarios; + if (scenarios === null || typeof scenarios !== "object" || !(phaseKey in scenarios)) { + return undefined; + } + const value = (scenarios as Record)[phaseKey]; + if (typeof value === "string") { + return value; + } + if (value !== null && typeof value === "object") { + const candidate = value as Record; + const scenario = typeof candidate.scenario === "string" ? candidate.scenario : undefined; + const repairScenario = + typeof candidate.repairScenario === "string" ? candidate.repairScenario : undefined; + return { + ...(scenario === undefined ? {} : { scenario }), + ...(repairScenario === undefined ? {} : { repairScenario }), + }; + } + + return undefined; +} + +function buildPhaseInstructions( + phaseKey: string, + title: string, + requirementsMd: string, + scenario: Required, +): string { + return [ + `Scenario: ${scenario.scenario}`, + `Repair-Scenario: ${scenario.repairScenario}`, + `Phase: ${phaseKey}`, + `Title: ${title}`, + "Requirements:", + requirementsMd, + ].join("\n"); +} + +function canonicalExistingPath(path: string): string { + return realpathSync(resolve(path)); +} + +function gitChildEnv(): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { ...process.env }; + for (const key of gitLocalEnvKeys) { + delete env[key]; + } + return env; +} + +const gitLocalEnvKeys = [ + "GIT_ALTERNATE_OBJECT_DIRECTORIES", + "GIT_CONFIG", + "GIT_CONFIG_PARAMETERS", + "GIT_CONFIG_COUNT", + "GIT_OBJECT_DIRECTORY", + "GIT_DIR", + "GIT_WORK_TREE", + "GIT_IMPLICIT_WORK_TREE", + "GIT_GRAFT_FILE", + "GIT_INDEX_FILE", + "GIT_NO_REPLACE_OBJECTS", + "GIT_REPLACE_REF_BASE", + "GIT_PREFIX", + "GIT_SHALLOW_FILE", + "GIT_COMMON_DIR", +] as const; + +async function atomicWriteFile(path: string, content: string): Promise { + await mkdir(dirname(path), { recursive: true }); + const tempPath = `${path}.${process.pid}.${Date.now()}.tmp`; + await writeFile(tempPath, content, "utf8"); + await rename(tempPath, path); +} + +function sha256Hex(bytes: Buffer): string { + return createHash("sha256").update(bytes).digest("hex"); +} + +function renderMarkdownReport(report: Record): string { + const runId = typeof report.runId === "string" ? report.runId : "unknown"; + const status = typeof report.status === "string" ? report.status : "unknown"; + const endedAt = typeof report.endedAt === "string" ? report.endedAt : "unknown"; + return [`# Devflow Run ${runId}`, "", `Status: ${status}`, `Ended: ${endedAt}`, ""].join("\n"); +} + +function serializeJson(value: unknown): unknown { + if (typeof value === "bigint") { + return value.toString(); + } + if (value instanceof Date) { + return value.toISOString(); + } + if (Array.isArray(value)) { + return value.map((item) => serializeJson(item)); + } + if (value !== null && typeof value === "object") { + return Object.fromEntries( + Object.entries(value as Record).map(([key, child]) => [ + key, + serializeJson(child), + ]), + ); + } + + return value; +} + +function isTerminalRunState(state: string): state is (typeof terminalRunStates)[number] { + return terminalRunStates.some((terminalState) => terminalState === state); +} + +function isPathInsideOrEqual(path: string, parent: string): boolean { + const relativePath = relative(parent, path); + return relativePath === "" || (!relativePath.startsWith("..") && relativePath !== ".."); +} + +function approvalConflict(runId: string, reason: string): DevflowError { + return new DevflowError("Approval decision conflicts with the current request state", { + class: "human_required", + code: "approval_conflict", + runId, + recoveryHint: reason, + }); +} + +function runNotFound(runId: string): DevflowError { + return new DevflowError("Run does not exist", { + class: "human_required", + code: "run_not_found", + runId, + }); +} diff --git a/packages/run-engine/src/fake-phase-harness.test.ts b/packages/run-engine/src/fake-phase-harness.test.ts index 3086c91..6fca84e 100644 --- a/packages/run-engine/src/fake-phase-harness.test.ts +++ b/packages/run-engine/src/fake-phase-harness.test.ts @@ -136,6 +136,18 @@ class StartFailsFakeAdapter extends FakeSessionAdapter { } } +class ResumeFailsFakeAdapter extends FakeSessionAdapter { + resumeAttempts = 0; + + override async resume(_handle: SessionHandle): Promise { + this.resumeAttempts += 1; + throw new DevflowError("transient resume failure", { + class: "recoverable", + code: "pane_briefly_unresponsive", + }); + } +} + class PromptWritesArtifactBeforeReturnFakeAdapter extends FakeSessionAdapter { override async sendPrompt( handle: SessionHandle, @@ -334,7 +346,7 @@ describe("runSingleFakePhase", () => { worktreeRoot, wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 100 }, }), - ).rejects.toMatchObject({ code: "internal_state_corruption" }); + ).rejects.toMatchObject({ code: "run_state_changed" }); const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); expect(run).toEqual({ state: runState }); @@ -578,6 +590,209 @@ describe("runSingleFakePhase", () => { expect(approvals).toEqual([]); }); + it("moves a successful workflow-gated phase from busy to waiting for approval without an idle event", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-workflow-gate-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + workflowApprovalGateKey: "spec_approved", + uuidFactory: () => "00000000-0000-4000-8000-000000000024", + }); + + const [run] = await db + .select({ currentPhaseId: runs.currentPhaseId, state: runs.state }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run).toEqual({ currentPhaseId: phaseId, state: "awaiting_approval" }); + + const [phase] = await db + .select({ state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "awaiting_approval" }); + + const [session] = await db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ state: "WAITING_FOR_APPROVAL" }); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "spec_approved", state: "pending" }); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "session.created", + "session.ready", + "session.busy", + "prompt.sent", + "artifact.expected", + "artifact.validated", + "approval.requested", + ]); + }); + + it("does not mark a timeout-repaired workflow-gated phase idle before approval", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-workflow-timeout-repair-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + workflowApprovalGateKey: "spec_approved", + uuidFactory: () => "00000000-0000-4000-8000-000000000036", + }); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "session.created", + "session.ready", + "session.busy", + "prompt.sent", + "artifact.expected", + "artifact.timeout", + "session.recovered", + "phase.started", + "session.busy", + "prompt.repaired", + "artifact.expected", + "artifact.validated", + "approval.requested", + ]); + }); + + it("does not mark a replayed valid artifact idle before requesting workflow approval", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-workflow-gate-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = "Scenario: ok\nWrite the development specification."; + const promptHash = hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const sessionId = randomUUID(); + const artifactId = randomUUID(); + const artifactHash = hash({ replay: "valid-workflow-gate" }); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptHash, + lastPromptAt: new Date(Date.now() - 1000), + state: "BUSY", + }); + await db.insert(artifacts).values({ + id: artifactId, + runId, + phaseId, + path: expectedArtifactPath, + schemaId: "dev/spec@1", + hash: artifactHash, + valid: true, + }); + await db.insert(runEvents).values({ + runId, + phaseId, + seq: 2n, + type: "artifact.validated", + payload: { + artifactId, + hash: artifactHash, + path: expectedArtifactPath, + schemaId: "dev/spec@1", + }, + idempotencyKey: `artifact.validated:${phaseId}:${expectedArtifactPath}:${artifactHash}`, + }); + + await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + workflowApprovalGateKey: "spec_approved", + }); + + const [session] = await db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ state: "WAITING_FOR_APPROVAL" }); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "artifact.validated", + "approval.requested", + ]); + }); + it("resumes a running phase when prompt delivery succeeded before prompt.sent was recorded", async () => { const { db, phaseId, runId } = await createRunAndPhase("executing", "running", 1); await recordPhaseStarted(db, runId, phaseId); @@ -662,6 +877,53 @@ describe("runSingleFakePhase", () => { ]); }); + it("requests a human gate when existing session resume exhausts retries", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-resume-fails-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + state: "READY", + }); + const adapter = new ResumeFailsFakeAdapter(); + + await expect( + runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nExisting session resume fails.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000039", + }), + ).rejects.toMatchObject({ code: "prompt_send_exhausted" }); + + expect(adapter.resumeAttempts).toBe(3); + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "paused" }); + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "prompt_send_exhausted", state: "pending" }); + }); + it("resumes a running phase when the crash happened before session creation", async () => { const { db, phaseId, runId } = await createRunAndPhase("executing", "running", 1); await recordPhaseStarted(db, runId, phaseId); @@ -898,6 +1160,65 @@ describe("runSingleFakePhase", () => { expect(run?.state).toBe("executing"); }); + it("reuses an idle role session when a later running phase has not sent its prompt yet", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "running", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-reuse-idle-session-")), + ); + tempRoots.push(worktreeRoot); + const previousArtifactPath = join(worktreeRoot, "artifacts", "previous-spec.json"); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "next-spec.json"); + const adapter = new FakeSessionAdapter({ writeDelayMs: 0 }); + const sessionHandle = await adapter.start({ + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath: previousArtifactPath, + expectedSchema: "dev/spec@1", + roleId: "implementer", + runId, + }); + await db.insert(tuiSessions).values({ + id: sessionHandle.sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath: previousArtifactPath, + expectedSchema: "dev/spec@1", + state: "READY", + }); + + await runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + const [session] = await db + .select({ + expectedArtifactPath: tuiSessions.expectedArtifactPath, + expectedSchema: tuiSessions.expectedSchema, + state: tuiSessions.state, + }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionHandle.sessionId)); + expect(session).toEqual({ + expectedArtifactPath, + expectedSchema: "dev/spec@1", + state: "READY", + }); + await expectRunCompleted(db, runId); + }); + it("replays an invalid validating artifact and uses the one repair attempt", async () => { const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 1); await recordPhaseStarted(db, runId, phaseId); @@ -1674,7 +1995,7 @@ describe("runSingleFakePhase", () => { worktreeRoot, wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, }), - ).rejects.toMatchObject({ code: "internal_state_corruption" }); + ).rejects.toMatchObject({ code: "run_state_changed" }); const events = await db .select({ type: runEvents.type }) diff --git a/packages/run-engine/src/fake-phase-harness.ts b/packages/run-engine/src/fake-phase-harness.ts index 0a3fece..da671ad 100644 --- a/packages/run-engine/src/fake-phase-harness.ts +++ b/packages/run-engine/src/fake-phase-harness.ts @@ -24,6 +24,8 @@ import { import { type SessionAdapter, type SessionHandle, + SessionManager, + type SessionRuntime, type TranscriptChunkSink, captureAndPersistTranscript, } from "@devflow/session"; @@ -39,9 +41,8 @@ interface ArtifactWaitOptions extends FakePhaseWaitOptions { ignoreInitialSignature?: string; } -export interface RunSingleFakePhaseInput { +interface RunSingleFakePhaseBaseInput { db: DbClient["db"]; - adapter: SessionAdapter; runId: string; phaseId: string; phaseKey: string; @@ -53,8 +54,18 @@ export interface RunSingleFakePhaseInput { wait?: FakePhaseWaitOptions; uuidFactory?: () => string; transcriptSink?: TranscriptChunkSink; + terminalRun?: boolean; + workflowApprovalGateKey?: string; + workflowApprovalPayload?: Record; } +export type RunSingleFakePhaseInput = RunSingleFakePhaseBaseInput & + ({ sessions: SessionRuntime; adapter?: never } | { adapter: SessionAdapter; sessions?: never }); + +type CanonicalRunSingleFakePhaseInput = RunSingleFakePhaseBaseInput & { + sessions: SessionRuntime; +}; + export interface RunSingleFakePhaseResult { sessionId: string; promptId: string; @@ -67,6 +78,8 @@ export interface RunSingleFakePhaseResult { type TransactionDb = Parameters[0]>[0]; const sendPromptRetryBudget = 2; +const terminalRunStates = ["completed", "failed", "aborted"] as const; +const phaseMutationRunStates = ["executing", "planning"] as const; interface PhaseEntry { attempt: number; @@ -81,7 +94,7 @@ interface PhaseEntry { function canonicalizeRunSingleFakePhaseInput( input: RunSingleFakePhaseInput, -): RunSingleFakePhaseInput { +): CanonicalRunSingleFakePhaseInput { const rawWorktreeRoot = resolve(input.worktreeRoot); const worktreeRoot = realpathSync(rawWorktreeRoot); const expectedArtifactPath = canonicalizePathAgainstWorktree( @@ -89,8 +102,12 @@ function canonicalizeRunSingleFakePhaseInput( rawWorktreeRoot, worktreeRoot, ); + const sessions = + "sessions" in input && input.sessions !== undefined + ? input.sessions + : new SessionManager({ db: input.db, adapter: input.adapter }); - return { ...input, expectedArtifactPath, worktreeRoot }; + return { ...input, expectedArtifactPath, sessions, worktreeRoot }; } function canonicalizePathAgainstWorktree( @@ -158,8 +175,12 @@ export async function runSingleFakePhase( if (phaseEntry.replayedOutcome !== undefined) { promptId = requirePhaseEntryPromptId(input, phaseEntry, "Replayed artifact entry"); promptDedupKeyForIdle = promptId; - await markSessionIdle(input, eventRepository, handle.sessionId, promptId); - initialPromptIdleRecorded = true; + const replayedWorkflowGate = + phaseEntry.replayedOutcome.validation.ok && input.workflowApprovalGateKey !== undefined; + if (!replayedWorkflowGate) { + await markSessionIdle(input, eventRepository, handle.sessionId, promptId); + initialPromptIdleRecorded = true; + } } else if (phaseEntry.continueArtifactWait) { promptId = requirePhaseEntryPromptId(input, phaseEntry, "Artifact wait replay"); promptDedupKeyForIdle = promptId; @@ -179,6 +200,10 @@ export async function runSingleFakePhase( ); promptId = promptSend.promptId; } catch (error) { + if (isRunStateChanged(error)) { + await captureTranscript(input, handle); + throw error; + } if (shouldCreateHumanGate(error)) { const gateError = toHumanRequiredRecoveryError(error); await failPhaseAndRequestGate( @@ -221,6 +246,10 @@ export async function runSingleFakePhase( promptSend?.artifactBaselineSignature, ); } catch (error) { + if (isRunStateChanged(error)) { + await captureTranscript(input, handle); + throw error; + } if (!isDevflowErrorWithCode(error, "artifact_timeout_exhausted")) { await failRunAndDisposeSession( input, @@ -237,6 +266,10 @@ export async function runSingleFakePhase( try { recovered = await recoverFromArtifactTimeout(input, eventRepository, handle.sessionId); } catch (recoveryError) { + if (isRunStateChanged(recoveryError)) { + await captureTranscript(input, handle); + throw recoveryError; + } if (shouldCreateHumanGate(recoveryError)) { const gateError = toArtifactTimeoutRecoveryGateError(recoveryError); await failPhaseAndRequestGate( @@ -335,6 +368,10 @@ export async function runSingleFakePhase( ); promptId = promptSend.promptId; } catch (repairError) { + if (isRunStateChanged(repairError)) { + await captureTranscript(input, handle); + throw repairError; + } if (!shouldCreateHumanGate(repairError)) { await failRunAndDisposeSession( input, @@ -374,6 +411,10 @@ export async function runSingleFakePhase( promptSend.artifactBaselineSignature, ); } catch (repairError) { + if (isRunStateChanged(repairError)) { + await captureTranscript(input, handle); + throw repairError; + } if (!isDevflowErrorWithCode(repairError, "artifact_timeout_exhausted")) { await failRunAndDisposeSession( input, @@ -399,12 +440,14 @@ export async function runSingleFakePhase( await captureTranscript(input, handle); throw repairError; } - await markSessionIdle( - input, - eventRepository, - handle.sessionId, - timeoutRepairEnvelope.dedupKey, - ); + if (!(outcome.validation.ok && input.workflowApprovalGateKey !== undefined)) { + await markSessionIdle( + input, + eventRepository, + handle.sessionId, + timeoutRepairEnvelope.dedupKey, + ); + } } } if (outcome === undefined) { @@ -415,7 +458,9 @@ export async function runSingleFakePhase( phaseId: input.phaseId, }); } - if (outcome.attempt === attempt && !initialPromptIdleRecorded) { + const successfulWorkflowGate = + outcome.validation.ok && input.workflowApprovalGateKey !== undefined; + if (outcome.attempt === attempt && !initialPromptIdleRecorded && !successfulWorkflowGate) { await markSessionIdle(input, eventRepository, handle.sessionId, promptDedupKeyForIdle); } @@ -474,6 +519,10 @@ export async function runSingleFakePhase( ); promptId = promptSend.promptId; } catch (error) { + if (isRunStateChanged(error)) { + await captureTranscript(input, handle); + throw error; + } if (!shouldCreateHumanGate(error)) { await failRunAndDisposeSession( input, @@ -512,6 +561,10 @@ export async function runSingleFakePhase( promptSend.artifactBaselineSignature, ); } catch (error) { + if (isRunStateChanged(error)) { + await captureTranscript(input, handle); + throw error; + } if (!isDevflowErrorWithCode(error, "artifact_timeout_exhausted")) { await failRunAndDisposeSession( input, @@ -537,11 +590,34 @@ export async function runSingleFakePhase( await captureTranscript(input, handle); throw error; } - await markSessionIdle(input, eventRepository, handle.sessionId, repairEnvelope.dedupKey); + if (!(outcome.validation.ok && input.workflowApprovalGateKey !== undefined)) { + await markSessionIdle(input, eventRepository, handle.sessionId, repairEnvelope.dedupKey); + } } if (outcome.validation.ok) { - await completePhaseAndRun(input, eventRepository, outcome.attempt, handle.sessionId); + if (input.workflowApprovalGateKey !== undefined) { + await requestWorkflowApproval( + input, + eventRepository, + outcome.attempt, + handle.sessionId, + input.workflowApprovalGateKey, + input.workflowApprovalPayload ?? { + artifactId: outcome.artifact.id, + expectedArtifactPath: input.expectedArtifactPath, + schemaId: input.expectedSchema, + }, + ); + } else { + await completePhaseAndRun( + input, + eventRepository, + outcome.attempt, + handle.sessionId, + input.terminalRun ?? true, + ); + } } else { await failPhaseAndRequestGate( input, @@ -577,7 +653,7 @@ export async function runSingleFakePhase( } async function enterInitialPhase( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, ): Promise { const attempt = await tryStartPhaseAndRecord(input, eventRepository, ["pending"]); @@ -634,11 +710,7 @@ async function enterInitialPhase( resumedPrompt: false, }; } - if ( - session.expectedArtifactPath === input.expectedArtifactPath && - session.expectedSchema === input.expectedSchema && - ["CREATED", "BOOTSTRAPPING", "READY"].includes(session.state) - ) { + if (["CREATED", "BOOTSTRAPPING", "READY"].includes(session.state)) { return { attempt: phase.attempts, continueArtifactWait: false, @@ -809,7 +881,10 @@ async function enterInitialPhase( throw cannotReplayPhase(input, phase.state); } -function cannotReplayPhase(input: RunSingleFakePhaseInput, phaseState: string): DevflowError { +function cannotReplayPhase( + input: CanonicalRunSingleFakePhaseInput, + phaseState: string, +): DevflowError { return new DevflowError("Cannot start a fake phase from the current phase state", { class: "fatal", code: "internal_state_corruption", @@ -820,7 +895,7 @@ function cannotReplayPhase(input: RunSingleFakePhaseInput, phaseState: string): } function requirePhaseEntryPromptId( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, phaseEntry: PhaseEntry, context: string, ): string { @@ -841,7 +916,7 @@ interface PhaseStartReplayMetadata { } async function phaseStartReplayMetadata( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, attempt: number, ): Promise { const [event] = await input.db @@ -875,7 +950,7 @@ interface PersistedArtifactReplay { } function persistedArtifactValidation( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, artifact: PersistedArtifactReplay, ): ReturnType { if (artifact.valid) { @@ -900,7 +975,7 @@ function persistedArtifactValidation( } async function startPhaseAndRecord( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, allowedCurrentStates: readonly string[], payload: Record = {}, @@ -938,7 +1013,7 @@ async function startPhaseAndRecord( } async function tryStartPhaseAndRecord( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, allowedCurrentStates: readonly string[], payload: Record = {}, @@ -946,14 +1021,23 @@ async function tryStartPhaseAndRecord( return input.db.transaction(async (tx) => { await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${input.runId} FOR UPDATE`); const [run] = await tx.select({ state: runs.state }).from(runs).where(eq(runs.id, input.runId)); - if (run === undefined || run.state !== "executing") { - throw new DevflowError("Cannot start a fake phase unless the run is executing", { + if (run === undefined) { + throw new DevflowError("Run does not exist", { class: "fatal", code: "internal_state_corruption", runId: input.runId, phaseId: input.phaseId, }); } + if (!phaseMutationRunStates.includes(run.state as (typeof phaseMutationRunStates)[number])) { + throw new DevflowError("Run left active state before fake phase start", { + class: "human_required", + code: "run_state_changed", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: `run_state=${run.state}`, + }); + } const [updatedPhase] = await tx .update(runPhases) @@ -1000,7 +1084,7 @@ async function tryStartPhaseAndRecord( } async function failPhaseAndRequestGate( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, attempt: number, reason: string, @@ -1011,6 +1095,23 @@ async function failPhaseAndRequestGate( ) { try { await input.db.transaction(async (tx) => { + await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${input.runId} FOR UPDATE`); + const [run] = await tx + .select({ state: runs.state }) + .from(runs) + .where(eq(runs.id, input.runId)); + if (run === undefined) { + throw new DevflowError("Run does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + if (isTerminalRunState(run.state)) { + return; + } + const request = await ensureHumanGateRequestInTransaction( input, tx, @@ -1031,23 +1132,11 @@ async function failPhaseAndRequestGate( idempotencyKey: `phase.failed:${input.phaseId}:${attempt}`, }); - const [run] = await tx - .select({ state: runs.state }) - .from(runs) - .where(eq(runs.id, input.runId)); - if (run === undefined) { - throw new DevflowError("Run does not exist", { - class: "fatal", - code: "internal_state_corruption", - runId: input.runId, - phaseId: input.phaseId, - }); - } if (run.state !== "paused") { const cause = `human_required:${gateKey}:${input.phaseId}:${attempt}`; await tx .update(runs) - .set({ state: "paused", pausedFromState: run.state }) + .set({ state: "paused", pausedFromState: run.state, updatedAt: new Date() }) .where(eq(runs.id, input.runId)); await eventRepository.appendInTransaction(tx, { runId: input.runId, @@ -1108,12 +1197,19 @@ async function failPhaseAndRequestGate( } async function failPhaseAndRun( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, attempt: number, reason: string, ) { + let sessionIdsToDispose: string[] = []; await input.db.transaction(async (tx) => { + await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${input.runId} FOR UPDATE`); + const [run] = await tx.select({ state: runs.state }).from(runs).where(eq(runs.id, input.runId)); + if (run === undefined || isTerminalRunState(run.state)) { + return; + } + await tx .update(runPhases) .set({ state: "failed", endedAt: new Date() }) @@ -1127,7 +1223,13 @@ async function failPhaseAndRun( }); await tx .update(runs) - .set({ state: "failed", endedAt: new Date() }) + .set({ + state: "failed", + currentPhaseId: null, + pausedFromState: null, + endedAt: new Date(), + updatedAt: new Date(), + }) .where(eq(runs.id, input.runId)); await eventRepository.appendInTransaction(tx, { runId: input.runId, @@ -1135,28 +1237,49 @@ async function failPhaseAndRun( payload: { reason }, idempotencyKey: `run.failed:${input.runId}`, }); + sessionIdsToDispose = await markAllSessionsFailedInTransaction( + tx, + eventRepository, + input.runId, + ); }); + await disposeSessionIds(input.sessions, sessionIdsToDispose); } async function failRunAndDisposeSession( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, attempt: number, reason: string, handle: { sessionId: string }, ) { await failPhaseAndRun(input, eventRepository, attempt, reason); - await input.adapter.dispose(handle); - await markSessionFailedNeedsHuman(input, eventRepository, handle.sessionId); + await input.sessions.dispose(handle).catch(() => undefined); } async function completePhaseAndRun( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, attempt: number, sessionId: string, + terminalRun = true, ) { await input.db.transaction(async (tx) => { + await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${input.runId} FOR UPDATE`); + const [run] = await tx.select({ state: runs.state }).from(runs).where(eq(runs.id, input.runId)); + if (run === undefined || isTerminalRunState(run.state)) { + return; + } + if (!phaseMutationRunStates.includes(run.state as (typeof phaseMutationRunStates)[number])) { + throw new DevflowError("Run left active state before fake phase completion", { + class: "human_required", + code: "run_state_changed", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: `run_state=${run.state}`, + }); + } + await tx .update(runPhases) .set({ state: "completed", endedAt: new Date() }) @@ -1174,6 +1297,7 @@ async function completePhaseAndRun( .from(tuiSessions) .where(eq(tuiSessions.id, sessionId)); const recoveryAttempts = session?.recoveryAttempts ?? 0; + await tx.update(tuiSessions).set({ state: "READY" }).where(eq(tuiSessions.id, sessionId)); await eventRepository.appendInTransaction(tx, { runId: input.runId, phaseId: input.phaseId, @@ -1182,9 +1306,19 @@ async function completePhaseAndRun( idempotencyKey: `session.ready:${sessionId}:${recoveryAttempts}`, }); + if (!terminalRun) { + return; + } + await tx .update(runs) - .set({ state: "completed", endedAt: new Date() }) + .set({ + state: "completed", + currentPhaseId: null, + pausedFromState: null, + endedAt: new Date(), + updatedAt: new Date(), + }) .where(eq(runs.id, input.runId)); await eventRepository.appendInTransaction(tx, { runId: input.runId, @@ -1195,15 +1329,123 @@ async function completePhaseAndRun( }); } +async function requestWorkflowApproval( + input: CanonicalRunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, + sessionId: string, + gateKey: string, + payload: Record, +) { + const approvalIdempotencyKey = `${input.runId}:${gateKey}:${input.phaseId}:${attempt}`; + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + const request = await ensureApprovalRequestInTransaction( + input, + tx, + gateKey, + approvalIdempotencyKey, + payload, + ); + + await tx + .update(runPhases) + .set({ state: "awaiting_approval" }) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + await tx + .update(tuiSessions) + .set({ state: "WAITING_FOR_APPROVAL" }) + .where(eq(tuiSessions.id, sessionId)); + await tx + .update(runs) + .set({ state: "awaiting_approval", currentPhaseId: input.phaseId }) + .where(and(eq(runs.id, input.runId), inArray(runs.state, ["executing", "planning"]))); + + const [run] = await tx + .select({ state: runs.state }) + .from(runs) + .where(eq(runs.id, input.runId)) + .limit(1); + if (run?.state !== "awaiting_approval") { + throw new DevflowError("Cannot request workflow approval after run left executing state", { + class: "human_required", + code: "run_state_changed", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: `run_state=${run?.state ?? "missing"}`, + }); + } + + await appendHumanGateRequestedEventInTransaction(input, eventRepository, tx, request, gateKey); + }); +} + +async function ensureApprovalRequestInTransaction( + input: CanonicalRunSingleFakePhaseInput, + tx: TransactionDb, + gateKey: string, + idempotencyKey: string, + payload: Record, +): Promise { + const inserted = await tx + .insert(approvalRequests) + .values({ + runId: input.runId, + phaseId: input.phaseId, + gateKey, + state: "pending", + idempotencyKey, + payload, + }) + .onConflictDoNothing({ target: approvalRequests.idempotencyKey }) + .returning({ id: approvalRequests.id, idempotencyKey: approvalRequests.idempotencyKey }); + if (inserted[0] !== undefined) { + return inserted[0]; + } + + const [existing] = await tx + .select({ + id: approvalRequests.id, + idempotencyKey: approvalRequests.idempotencyKey, + payload: approvalRequests.payload, + state: approvalRequests.state, + }) + .from(approvalRequests) + .where(eq(approvalRequests.idempotencyKey, idempotencyKey)); + if (existing === undefined || existing.state !== "pending") { + throw new DevflowError("Approval request replay did not match pending request", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + if (canonicalize(existing.payload) !== canonicalize(payload)) { + throw new DevflowError("Approval request replay payload mismatch", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + return { id: existing.id, idempotencyKey: existing.idempotencyKey }; +} + async function startSessionAndRecord( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, attempt: number, ): Promise { + const existingHandle = await resumeExistingSessionAndRecord(input, eventRepository, attempt); + if (existingHandle !== undefined) { + return existingHandle; + } + let handle: SessionHandle | undefined; let sessionRowPersisted = false; try { - handle = await input.adapter.start({ + handle = await input.sessions.start({ runId: input.runId, roleId: input.roleId, backend: "fake", @@ -1212,17 +1454,27 @@ async function startSessionAndRecord( expectedSchema: input.expectedSchema, }); const startedHandle = handle; + let sessionInsertConflicted = false; await input.db.transaction(async (tx) => { - await tx.insert(tuiSessions).values({ - id: startedHandle.sessionId, - runId: input.runId, - roleId: input.roleId, - backend: "fake", - cwd: input.worktreeRoot, - expectedArtifactPath: input.expectedArtifactPath, - expectedSchema: input.expectedSchema, - state: "CREATED", - }); + await assertRunCanMutatePhaseInTransaction(input, tx); + const insertedSession = await tx + .insert(tuiSessions) + .values({ + id: startedHandle.sessionId, + runId: input.runId, + roleId: input.roleId, + backend: "fake", + cwd: input.worktreeRoot, + expectedArtifactPath: input.expectedArtifactPath, + expectedSchema: input.expectedSchema, + state: "CREATED", + }) + .onConflictDoNothing({ target: [tuiSessions.runId, tuiSessions.roleId] }) + .returning({ id: tuiSessions.id }); + if (insertedSession[0] === undefined) { + sessionInsertConflicted = true; + return; + } await eventRepository.appendInTransaction(tx, { runId: input.runId, phaseId: input.phaseId, @@ -1246,13 +1498,30 @@ async function startSessionAndRecord( idempotencyKey: `session.ready:${startedHandle.sessionId}:0`, }); }); + if (sessionInsertConflicted) { + await input.sessions.dispose(startedHandle).catch(() => undefined); + handle = undefined; + const existingHandle = await resumeExistingSessionAndRecord(input, eventRepository, attempt); + if (existingHandle !== undefined) { + return existingHandle; + } + throw new DevflowError("Concurrent fake session insert conflicted without an existing row", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } sessionRowPersisted = true; return startedHandle; } catch (error) { if (handle !== undefined) { - await input.adapter.dispose(handle); + await input.sessions.dispose(handle); } + if (isRunStateChanged(error)) { + throw error; + } if (shouldCreateHumanGate(error)) { const gateError = toHumanRequiredRecoveryError(error); await failPhaseAndRequestGate( @@ -1275,18 +1544,128 @@ async function startSessionAndRecord( } } +async function resumeExistingSessionAndRecord( + input: CanonicalRunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, +): Promise { + const [session] = await input.db + .select({ + id: tuiSessions.id, + recoveryAttempts: tuiSessions.recoveryAttempts, + state: tuiSessions.state, + }) + .from(tuiSessions) + .where(and(eq(tuiSessions.runId, input.runId), eq(tuiSessions.roleId, input.roleId))) + .limit(1); + if (session === undefined) { + return undefined; + } + if (session.state === "FAILED_NEEDS_HUMAN") { + throw new DevflowError("Cannot reuse a failed fake phase session", { + class: "human_required", + code: "session_failed_needs_human", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + let handle: SessionHandle; + try { + handle = await resumeWithRetry(input.sessions, { sessionId: session.id }); + } catch (error) { + if (isRunStateChanged(error)) { + throw error; + } + if (shouldCreateHumanGate(error)) { + const gateError = toHumanRequiredRecoveryError(error); + await failPhaseAndRequestGate( + input, + eventRepository, + attempt, + "session_resume_failed", + gateError.code, + { errorCode: error.code, recoveryHint: gateError.recoveryHint }, + session.id, + { markSessionCrashed: true }, + ); + throw gateError; + } + + await failPhaseAndRun(input, eventRepository, attempt, "session_resume_failed"); + throw error; + } + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await tx + .update(tuiSessions) + .set({ + cwd: input.worktreeRoot, + expectedArtifactPath: input.expectedArtifactPath, + expectedSchema: input.expectedSchema, + state: "READY", + }) + .where(eq(tuiSessions.id, session.id)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.ready", + payload: { + sessionId: session.id, + roleId: input.roleId, + recoveryAttempts: session.recoveryAttempts, + }, + idempotencyKey: `session.ready:${session.id}:${session.recoveryAttempts}`, + }); + }); + + return handle; +} + async function setPhaseState( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, state: "running" | "awaiting_artifact" | "validating" | "completed" | "failed", ) { - await input.db - .update(runPhases) - .set({ state }) - .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await tx + .update(runPhases) + .set({ state }) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + }); +} + +function isTerminalRunState(state: string): boolean { + return terminalRunStates.includes(state as (typeof terminalRunStates)[number]); +} + +async function assertRunCanMutatePhaseInTransaction( + input: CanonicalRunSingleFakePhaseInput, + tx: TransactionDb, +) { + await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${input.runId} FOR UPDATE`); + const [run] = await tx.select({ state: runs.state }).from(runs).where(eq(runs.id, input.runId)); + if (run === undefined) { + throw new DevflowError("Run does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + if (!phaseMutationRunStates.includes(run.state as (typeof phaseMutationRunStates)[number])) { + throw new DevflowError("Run left active state before fake phase mutation", { + class: "human_required", + code: "run_state_changed", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: `run_state=${run.state}`, + }); + } } function buildEnvelope( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, attempt: number, instructions: string, ): PromptEnvelope { @@ -1323,54 +1702,63 @@ interface SendPromptAndRecordOptions { } async function sendPromptAndRecord( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, handle: { sessionId: string }, envelope: PromptEnvelope, type: "prompt.sent" | "prompt.repaired", options: SendPromptAndRecordOptions = {}, ): Promise { - await input.db - .update(tuiSessions) - .set({ - state: "BUSY", - lastPromptHash: envelope.dedupKey, - lastPromptAt: new Date(), - }) - .where(eq(tuiSessions.id, handle.sessionId)); - await eventRepository.append({ - runId: input.runId, - phaseId: input.phaseId, - type: "session.busy", - payload: { sessionId: handle.sessionId, roleId: input.roleId, dedupKey: envelope.dedupKey }, - idempotencyKey: `session.busy:${handle.sessionId}:${envelope.dedupKey}`, + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await tx + .update(tuiSessions) + .set({ + cwd: input.worktreeRoot, + expectedArtifactPath: input.expectedArtifactPath, + expectedSchema: input.expectedSchema, + state: "BUSY", + lastPromptHash: envelope.dedupKey, + lastPromptAt: new Date(), + }) + .where(eq(tuiSessions.id, handle.sessionId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.busy", + payload: { sessionId: handle.sessionId, roleId: input.roleId, dedupKey: envelope.dedupKey }, + idempotencyKey: `session.busy:${handle.sessionId}:${envelope.dedupKey}`, + }); }); const artifactBaselineSignature = options.captureArtifactBaseline === false ? undefined : await artifactSignature(input.expectedArtifactPath); - const prompt = await sendPromptWithRetry(input.adapter, handle, envelope); - await eventRepository.append({ - runId: input.runId, - phaseId: input.phaseId, - type, - payload: { roleId: input.roleId, dedupKey: envelope.dedupKey }, - idempotencyKey: `${type}:${envelope.dedupKey}`, + const prompt = await sendPromptWithRetry(input.sessions, handle, envelope); + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type, + payload: { roleId: input.roleId, dedupKey: envelope.dedupKey }, + idempotencyKey: `${type}:${envelope.dedupKey}`, + }); }); return { promptId: prompt.promptId, artifactBaselineSignature }; } async function sendPromptWithRetry( - adapter: SessionAdapter, + sessions: SessionRuntime, handle: { sessionId: string }, envelope: PromptEnvelope, ): Promise<{ promptId: string }> { let lastError: unknown; for (let physicalAttempt = 0; physicalAttempt <= sendPromptRetryBudget; physicalAttempt += 1) { try { - return await adapter.sendPrompt(handle, envelope); + return await sessions.sendPrompt(handle, envelope); } catch (error) { lastError = error; if (!(error instanceof DevflowError) || error.class !== "recoverable") { @@ -1398,25 +1786,13 @@ interface ArtifactRecord { } async function waitForAndValidateArtifact( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, attempt: number, sessionId: string, artifactBaselineSignature: string | undefined, ): Promise { - await eventRepository.append({ - runId: input.runId, - phaseId: input.phaseId, - type: "artifact.expected", - payload: { - path: input.expectedArtifactPath, - schemaId: input.expectedSchema, - attempt, - }, - idempotencyKey: `artifact.expected:${input.phaseId}:${attempt}:${input.expectedArtifactPath}`, - }); - - await setPhaseState(input, "awaiting_artifact"); + await startArtifactWait(input, eventRepository, attempt); try { const waitOptions: ArtifactWaitOptions = { ...input.wait }; if (artifactBaselineSignature !== undefined) { @@ -1427,7 +1803,77 @@ async function waitForAndValidateArtifact( if (!isDevflowErrorWithCode(error, "artifact_timeout_exhausted")) { throw error; } - await eventRepository.append({ + await recordArtifactTimeout(input, eventRepository, attempt, sessionId); + throw error; + } + + await setPhaseState(input, "validating"); + return validateCurrentArtifact(input, eventRepository, attempt); +} + +async function validateCurrentArtifact( + input: CanonicalRunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, +): Promise { + const artifactBytes = await readArtifactBytes(input); + const artifactHash = createHash("sha256").update(artifactBytes).digest("hex"); + const parsedArtifact = parseArtifactJson(artifactBytes); + const validation = validateArtifact(input.expectedSchema, parsedArtifact); + + const artifact = await recordArtifactValidation( + input, + eventRepository, + attempt, + artifactHash, + validation, + ); + if (artifact === undefined) { + throw new DevflowError("Artifact insert returned no row", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + return { attempt, artifact, artifactHash, validation }; +} + +async function startArtifactWait( + input: CanonicalRunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, +) { + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await tx + .update(runPhases) + .set({ state: "awaiting_artifact" }) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "artifact.expected", + payload: { + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + attempt, + }, + idempotencyKey: `artifact.expected:${input.phaseId}:${attempt}:${input.expectedArtifactPath}`, + }); + }); +} + +async function recordArtifactTimeout( + input: CanonicalRunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, + sessionId: string, +) { + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await eventRepository.appendInTransaction(tx, { runId: input.runId, phaseId: input.phaseId, type: "artifact.timeout", @@ -1438,68 +1884,58 @@ async function waitForAndValidateArtifact( }, idempotencyKey: `artifact.timeout:${input.phaseId}:${attempt}:${input.expectedArtifactPath}`, }); - await input.db + await tx .update(tuiSessions) .set({ state: "ARTIFACT_TIMEOUT" }) .where(eq(tuiSessions.id, sessionId)); - throw error; - } - - await setPhaseState(input, "validating"); - return validateCurrentArtifact(input, eventRepository, attempt); + }); } -async function validateCurrentArtifact( - input: RunSingleFakePhaseInput, +async function recordArtifactValidation( + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, attempt: number, -): Promise { - const artifactBytes = await readArtifactBytes(input); - const artifactHash = createHash("sha256").update(artifactBytes).digest("hex"); - const parsedArtifact = parseArtifactJson(artifactBytes); - const validation = validateArtifact(input.expectedSchema, parsedArtifact); - - const artifact = await insertArtifactRecord(input, artifactHash, validation); - if (artifact === undefined) { - throw new DevflowError("Artifact insert returned no row", { - class: "fatal", - code: "internal_state_corruption", + artifactHash: string, + validation: ReturnType, +): Promise<{ id: string } | undefined> { + return input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + const artifact = await insertArtifactRecordInTransaction(tx, input, artifactHash, validation); + if (artifact === undefined) { + return undefined; + } + await eventRepository.appendInTransaction(tx, { runId: input.runId, phaseId: input.phaseId, + type: validation.ok ? "artifact.validated" : "artifact.invalid", + payload: validation.ok + ? { + artifactId: artifact.id, + hash: artifactHash, + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + } + : { + artifactId: artifact.id, + hash: artifactHash, + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + errors: validation.errors, + }, + idempotencyKey: `${validation.ok ? "artifact.validated" : "artifact.invalid"}:${input.phaseId}:${input.expectedArtifactPath}:${artifactHash}`, }); - } - - await eventRepository.append({ - runId: input.runId, - phaseId: input.phaseId, - type: validation.ok ? "artifact.validated" : "artifact.invalid", - payload: validation.ok - ? { - artifactId: artifact.id, - hash: artifactHash, - path: input.expectedArtifactPath, - schemaId: input.expectedSchema, - } - : { - artifactId: artifact.id, - hash: artifactHash, - path: input.expectedArtifactPath, - schemaId: input.expectedSchema, - errors: validation.errors, - }, - idempotencyKey: `${validation.ok ? "artifact.validated" : "artifact.invalid"}:${input.phaseId}:${input.expectedArtifactPath}:${artifactHash}`, + return artifact; }); - - return { attempt, artifact, artifactHash, validation }; } -async function insertArtifactRecord( - input: RunSingleFakePhaseInput, +async function insertArtifactRecordInTransaction( + tx: TransactionDb, + input: CanonicalRunSingleFakePhaseInput, artifactHash: string, validation: ReturnType, ): Promise<{ id: string } | undefined> { const validationError = validation.ok ? null : { errors: validation.errors }; - const inserted = await input.db + const inserted = await tx .insert(artifacts) .values({ runId: input.runId, @@ -1522,7 +1958,7 @@ async function insertArtifactRecord( const artifact = inserted[0] ?? ( - await input.db + await tx .select({ id: artifacts.id, phaseId: artifacts.phaseId, @@ -1549,7 +1985,7 @@ async function insertArtifactRecord( } function assertArtifactReplayMatches( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, valid: boolean, validationError: unknown, artifact: ArtifactRecord, @@ -1572,23 +2008,26 @@ function assertArtifactReplayMatches( } async function markSessionIdle( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, sessionId: string, promptDedupKey: string, ) { - await input.db.update(tuiSessions).set({ state: "READY" }).where(eq(tuiSessions.id, sessionId)); - await eventRepository.append({ - runId: input.runId, - phaseId: input.phaseId, - type: "session.idle", - payload: { sessionId, roleId: input.roleId, dedupKey: promptDedupKey }, - idempotencyKey: `session.idle:${sessionId}:${promptDedupKey}`, + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await tx.update(tuiSessions).set({ state: "READY" }).where(eq(tuiSessions.id, sessionId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.idle", + payload: { sessionId, roleId: input.roleId, dedupKey: promptDedupKey }, + idempotencyKey: `session.idle:${sessionId}:${promptDedupKey}`, + }); }); } async function markSessionReady( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, sessionId: string, ) { @@ -1607,53 +2046,61 @@ async function markSessionReady( } async function recoverFromArtifactTimeout( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, sessionId: string, ): Promise { - const probe = await probeWithTypedError(input.adapter, { sessionId }); + const probe = await probeWithTypedError(input.sessions, { sessionId }); if (!probe.alive || !probe.paneActive) { return false; } - await input.db - .update(tuiSessions) - .set({ state: "RESUMING" }) - .where(eq(tuiSessions.id, sessionId)); + await setSessionStateIfRunActive(input, sessionId, "RESUMING"); - const rebootstrapOk = await rebootstrapWithRetry(input.adapter, { sessionId }); + const rebootstrapOk = await rebootstrapWithRetry(input.sessions, { sessionId }); if (!rebootstrapOk) { return false; } - await input.db - .update(tuiSessions) - .set({ state: "REBOOTSTRAPPED" }) - .where(eq(tuiSessions.id, sessionId)); + await setSessionStateIfRunActive(input, sessionId, "REBOOTSTRAPPED"); - const [session] = await input.db - .select({ recoveryAttempts: tuiSessions.recoveryAttempts }) - .from(tuiSessions) - .where(eq(tuiSessions.id, sessionId)); - const recoveryAttempts = (session?.recoveryAttempts ?? 0) + 1; - await input.db - .update(tuiSessions) - .set({ state: "READY", recoveryAttempts }) - .where(eq(tuiSessions.id, sessionId)); - await eventRepository.append({ - runId: input.runId, - phaseId: input.phaseId, - type: "session.recovered", - payload: { sessionId, roleId: input.roleId, recoveryAttempts }, - idempotencyKey: `session.recovered:${sessionId}:${recoveryAttempts}`, + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + const [session] = await tx + .select({ recoveryAttempts: tuiSessions.recoveryAttempts }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + const recoveryAttempts = (session?.recoveryAttempts ?? 0) + 1; + await tx + .update(tuiSessions) + .set({ state: "READY", recoveryAttempts }) + .where(eq(tuiSessions.id, sessionId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.recovered", + payload: { sessionId, roleId: input.roleId, recoveryAttempts }, + idempotencyKey: `session.recovered:${sessionId}:${recoveryAttempts}`, + }); }); return true; } +async function setSessionStateIfRunActive( + input: CanonicalRunSingleFakePhaseInput, + sessionId: string, + state: "RESUMING" | "REBOOTSTRAPPED", +) { + await input.db.transaction(async (tx) => { + await assertRunCanMutatePhaseInTransaction(input, tx); + await tx.update(tuiSessions).set({ state }).where(eq(tuiSessions.id, sessionId)); + }); +} + async function probeWithTypedError( - adapter: SessionAdapter, + sessions: SessionRuntime, handle: { sessionId: string }, -): ReturnType { +): ReturnType { try { - return await adapter.probe(handle); + return await sessions.probe(handle); } catch (error) { if (error instanceof DevflowError) { throw error; @@ -1667,12 +2114,12 @@ async function probeWithTypedError( } async function rebootstrapWithRetry( - adapter: SessionAdapter, + sessions: SessionRuntime, handle: { sessionId: string }, ): Promise { for (let attemptsRemaining = 2; attemptsRemaining > 0; attemptsRemaining -= 1) { try { - await adapter.rebootstrap(handle); + await sessions.rebootstrap(handle); return true; } catch (error) { if (!(error instanceof DevflowError)) { @@ -1692,8 +2139,27 @@ async function rebootstrapWithRetry( return false; } +async function resumeWithRetry( + sessions: SessionRuntime, + handle: { sessionId: string }, +): Promise { + let lastError: unknown; + for (let physicalAttempt = 0; physicalAttempt <= 2; physicalAttempt += 1) { + try { + return await sessions.resume(handle); + } catch (error) { + lastError = error; + if (!(error instanceof DevflowError) || error.class !== "recoverable") { + throw error; + } + } + } + + throw lastError; +} + async function markSessionFailedNeedsHuman( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, sessionId: string, ) { @@ -1710,6 +2176,43 @@ async function markSessionFailedNeedsHuman( }); } +async function markAllSessionsFailedInTransaction( + tx: TransactionDb, + eventRepository: RunEventRepository, + runId: string, +): Promise { + const sessions = await tx + .select({ id: tuiSessions.id, roleId: tuiSessions.roleId }) + .from(tuiSessions) + .where(eq(tuiSessions.runId, runId)); + if (sessions.length === 0) { + return []; + } + + await tx + .update(tuiSessions) + .set({ state: "FAILED_NEEDS_HUMAN" }) + .where(eq(tuiSessions.runId, runId)); + for (const session of sessions) { + await eventRepository.appendInTransaction(tx, { + runId, + type: "session.failed", + payload: { sessionId: session.id, roleId: session.roleId }, + idempotencyKey: `session.failed:${session.id}`, + }); + } + + return sessions.map((session) => session.id); +} + +async function disposeSessionIds(sessions: SessionRuntime, sessionIds: readonly string[]) { + await Promise.all( + [...new Set(sessionIds)].map((sessionId) => + sessions.dispose({ sessionId }).catch(() => undefined), + ), + ); +} + async function waitForArtifact(path: string, options: ArtifactWaitOptions = {}): Promise { const timeoutMs = options.timeoutMs ?? 5_000; const pollIntervalMs = options.pollIntervalMs ?? 25; @@ -1794,7 +2297,7 @@ function parseArtifactJson(bytes: Buffer): unknown { } } -async function readArtifactBytes(input: RunSingleFakePhaseInput): Promise { +async function readArtifactBytes(input: CanonicalRunSingleFakePhaseInput): Promise { try { return await readFile(input.expectedArtifactPath); } catch (cause) { @@ -1829,7 +2332,7 @@ interface HumanGateRequest { } async function ensureHumanGateRequestInTransaction( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, tx: TransactionDb, gateKey: string, attempt: number, @@ -1886,7 +2389,7 @@ async function ensureHumanGateRequestInTransaction( } async function appendHumanGateRequestedEventInTransaction( - input: RunSingleFakePhaseInput, + input: CanonicalRunSingleFakePhaseInput, eventRepository: RunEventRepository, tx: TransactionDb, request: HumanGateRequest, @@ -1924,6 +2427,10 @@ function isDevflowErrorWithCode(error: unknown, code: string): error is DevflowE return error instanceof DevflowError && error.code === code; } +function isRunStateChanged(error: unknown): error is DevflowError { + return isDevflowErrorWithCode(error, "run_state_changed"); +} + function shouldCreateHumanGate(error: unknown): error is DevflowError { return error instanceof DevflowError && error.class !== "fatal"; } @@ -1973,7 +2480,7 @@ function toArtifactTimeoutRecoveryGateError(error: DevflowError): DevflowError { return new DevflowError("Artifact timeout recovery exhausted retry budget", options); } -async function removeStaleArtifact(input: RunSingleFakePhaseInput): Promise { +async function removeStaleArtifact(input: CanonicalRunSingleFakePhaseInput): Promise { try { await unlink(input.expectedArtifactPath); } catch (cause) { @@ -1991,7 +2498,10 @@ async function removeStaleArtifact(input: RunSingleFakePhaseInput): Promise = Record>( + text: string, + values?: readonly unknown[], + ): Promise<{ rows: T[] }>; + release(): void; +} + +export interface SessionRuntime { + trackOperation(operation: Promise): Promise; + start(input: StartInput): Promise; + sendPrompt(handle: SessionHandle, envelope: PromptEnvelope): Promise<{ promptId: string }>; + probe(handle: SessionHandle): Promise; + resume(handle: SessionHandle): Promise; + rebootstrap(handle: SessionHandle): Promise; + capture(handle: SessionHandle, fromSeq: bigint): AsyncIterable; + dispose(handle: SessionHandle): Promise; +} + +export interface SessionManagerOptions { + adapter: SessionAdapter; + db?: Database; + dbClient?: DbClient; + recoveryRunIds?: readonly string[]; + shutdownDrainMs?: number; +} + +export interface SessionManagerRecoveryResult { + recoveredSessionIds: string[]; + failedSessionIds: string[]; +} + +export class SessionManager implements SessionRuntime { + private readonly adapter: SessionAdapter; + private readonly db: Database | undefined; + private readonly dbClient: DbClient | undefined; + private readonly recoveryRunIds: readonly string[] | undefined; + private readonly shutdownDrainMs: number; + private readonly handles = new Map(); + private readonly inFlight = new Set>(); + private lockClient: AdvisoryLockClient | undefined; + private draining = false; + + constructor(options: SessionManagerOptions) { + this.adapter = options.adapter; + this.db = options.dbClient?.db ?? options.db; + this.dbClient = options.dbClient; + this.recoveryRunIds = options.recoveryRunIds; + this.shutdownDrainMs = options.shutdownDrainMs ?? 30_000; + } + + async initialize(): Promise { + await this.acquireLock(); + try { + return await this.recoverSessions(); + } catch (error) { + await this.shutdown(); + throw error; + } + } + + async acquireLock(): Promise { + if (this.dbClient === undefined) { + throw new DevflowError("SessionManager requires a DbClient for singleton startup", { + class: "fatal", + code: "internal_state_corruption", + }); + } + if (this.lockClient !== undefined) { + return; + } + + const client = (await this.dbClient.pool.connect()) as AdvisoryLockClient; + const result = await client.query<{ acquired: boolean }>( + "SELECT pg_try_advisory_lock(hashtext($1)) AS acquired", + ["devflow:session-manager"], + ); + if (result.rows[0]?.acquired !== true) { + client.release(); + throw new DevflowError("another session manager is running", { + class: "human_required", + code: "session_manager_already_running", + recoveryHint: "exit_code=3", + }); + } + + this.lockClient = client; + } + + async shutdown(): Promise { + this.draining = true; + await this.waitForInFlight(); + const client = this.lockClient; + this.lockClient = undefined; + this.handles.clear(); + if (client !== undefined) { + try { + await client.query("SELECT pg_advisory_unlock(hashtext($1))", ["devflow:session-manager"]); + } finally { + client.release(); + } + } + } + + trackOperation(operation: Promise): Promise { + return this.track(operation); + } + + async start(input: StartInput): Promise { + this.assertAcceptingPrompts(); + const handle = await this.track(this.adapter.start(input)); + this.handles.set(handle.sessionId, handle); + return handle; + } + + async sendPrompt(handle: SessionHandle, envelope: PromptEnvelope): Promise<{ promptId: string }> { + this.assertAcceptingPrompts(); + return this.track(this.adapter.sendPrompt(this.handleFor(handle), envelope)); + } + + async probe(handle: SessionHandle): Promise { + return this.track(this.adapter.probe(this.handleFor(handle))); + } + + async resume(handle: SessionHandle): Promise { + this.assertAcceptingPrompts(); + const resumed = await this.track(this.adapter.resume(this.handleFor(handle))); + this.handles.set(resumed.sessionId, resumed); + return resumed; + } + + async rebootstrap(handle: SessionHandle): Promise { + this.assertAcceptingPrompts(); + const rebootstrapped = await this.track(this.adapter.rebootstrap(this.handleFor(handle))); + this.handles.set(rebootstrapped.sessionId, rebootstrapped); + return rebootstrapped; + } + + async *capture(handle: SessionHandle, fromSeq: bigint): AsyncIterable { + const finishTracking = this.beginTrackedOperation(); + try { + for await (const chunk of this.adapter.capture(this.handleFor(handle), fromSeq)) { + yield chunk; + } + } finally { + finishTracking(); + } + } + + async dispose(handle: SessionHandle): Promise { + const resolvedHandle = this.handleFor(handle); + await this.track(this.adapter.dispose(resolvedHandle)); + this.handles.delete(resolvedHandle.sessionId); + } + + async recoverSessions(): Promise { + if (this.db === undefined) { + return { recoveredSessionIds: [], failedSessionIds: [] }; + } + + const sessionRows = await this.db + .select({ + id: tuiSessions.id, + runId: tuiSessions.runId, + roleId: tuiSessions.roleId, + backend: tuiSessions.backend, + cwd: tuiSessions.cwd, + lastKnownPanePid: tuiSessions.lastKnownPanePid, + recoveryAttempts: tuiSessions.recoveryAttempts, + state: tuiSessions.state, + tmuxSession: tuiSessions.tmuxSession, + tmuxWindow: tuiSessions.tmuxWindow, + }) + .from(tuiSessions) + .innerJoin(runs, eq(tuiSessions.runId, runs.id)) + .where( + this.recoveryRunIds === undefined + ? and( + ne(tuiSessions.state, "FAILED_NEEDS_HUMAN"), + notInArray(runs.state, [...terminalRunStates]), + ) + : and( + ne(tuiSessions.state, "FAILED_NEEDS_HUMAN"), + notInArray(runs.state, [...terminalRunStates]), + inArray(tuiSessions.runId, [...this.recoveryRunIds]), + ), + ); + + const recoveredSessionIds: string[] = []; + const failedSessionIds: string[] = []; + for (const session of sessionRows) { + const handle = compactHandle( + session.id, + session.lastKnownPanePid, + session.tmuxSession, + session.tmuxWindow, + ); + try { + const resumed = await this.resumeWithRetry(handle); + this.handles.set(resumed.sessionId, resumed); + recoveredSessionIds.push(resumed.sessionId); + } catch (error) { + await this.markRecoveryFailed(session, error); + failedSessionIds.push(session.id); + } + } + + return { recoveredSessionIds, failedSessionIds }; + } + + private async markRecoveryFailed( + session: { + id: string; + runId: string; + roleId: string; + backend: string; + cwd: string; + recoveryAttempts: number; + }, + error: unknown, + ): Promise { + if (this.db === undefined) { + return; + } + + const eventRepository = new RunEventRepository(this.db); + const recoveryAttempts = session.recoveryAttempts + 1; + const gateKey = "session_recovery_required"; + const approvalIdempotencyKey = `${session.runId}:${gateKey}:${session.id}:${recoveryAttempts}`; + const pauseCause = `session_recovery_failed:${session.id}:${recoveryAttempts}`; + await this.db.transaction(async (tx) => { + await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${session.runId} FOR UPDATE`); + const [run] = await tx + .select({ state: runs.state }) + .from(runs) + .where(eq(runs.id, session.runId)) + .limit(1); + await tx + .update(tuiSessions) + .set({ state: "FAILED_NEEDS_HUMAN", recoveryAttempts }) + .where(eq(tuiSessions.id, session.id)); + await eventRepository.appendInTransaction(tx, { + runId: session.runId, + type: "session.failed", + payload: { sessionId: session.id, roleId: session.roleId }, + idempotencyKey: `session.failed:${session.id}`, + }); + if (run === undefined || isTerminalRunState(run.state)) { + return; + } + const inserted = await tx + .insert(approvalRequests) + .values({ + runId: session.runId, + gateKey, + state: "pending", + idempotencyKey: approvalIdempotencyKey, + payload: { + sessionId: session.id, + roleId: session.roleId, + backend: session.backend, + cwd: session.cwd, + recoveryHint: recoveryHintFor(error), + }, + }) + .onConflictDoNothing({ target: approvalRequests.idempotencyKey }) + .returning({ id: approvalRequests.id, idempotencyKey: approvalRequests.idempotencyKey }); + if (run.state !== "paused") { + await tx + .update(runs) + .set({ state: "paused", pausedFromState: run.state, updatedAt: new Date() }) + .where(eq(runs.id, session.runId)); + await eventRepository.appendInTransaction(tx, { + runId: session.runId, + type: "run.paused", + payload: { cause: pauseCause, pausedFromState: run.state }, + idempotencyKey: `run.paused:${session.runId}:${pauseCause}`, + }); + } + const request = + inserted[0] ?? + ( + await tx + .select({ id: approvalRequests.id, idempotencyKey: approvalRequests.idempotencyKey }) + .from(approvalRequests) + .where(eq(approvalRequests.idempotencyKey, approvalIdempotencyKey)) + .limit(1) + )[0]; + if (request !== undefined) { + await eventRepository.appendInTransaction(tx, { + runId: session.runId, + type: "approval.requested", + payload: { + approvalRequestId: request.id, + approvalIdempotencyKey: request.idempotencyKey, + gateKey, + }, + idempotencyKey: `approval.requested:${request.idempotencyKey}`, + }); + } + }); + } + + private async resumeWithRetry(handle: SessionHandle): Promise { + let lastError: unknown; + for (let attempt = 0; attempt <= 2; attempt += 1) { + try { + return await this.track(this.adapter.resume(handle)); + } catch (error) { + lastError = error; + if (!(error instanceof DevflowError) || error.class !== "recoverable") { + throw error; + } + } + } + throw lastError; + } + + private async track(operation: Promise): Promise { + const tracked = operation.finally(() => { + this.inFlight.delete(tracked); + }); + this.inFlight.add(tracked); + return tracked; + } + + private beginTrackedOperation(): () => void { + let finishOperation!: () => void; + const tracked = new Promise((resolve) => { + finishOperation = resolve; + }).finally(() => { + this.inFlight.delete(tracked); + }); + this.inFlight.add(tracked); + return finishOperation; + } + + private async waitForInFlight(): Promise { + if (this.inFlight.size === 0) { + return; + } + await Promise.race([ + Promise.allSettled([...this.inFlight]), + new Promise((resolveWait) => setTimeout(resolveWait, this.shutdownDrainMs)), + ]); + } + + private handleFor(handle: SessionHandle): SessionHandle { + return this.handles.get(handle.sessionId) ?? handle; + } + + private assertAcceptingPrompts(): void { + if (this.draining) { + throw new DevflowError("SessionManager is draining", { + class: "human_required", + code: "session_manager_draining", + }); + } + } +} + +const terminalRunStates = ["completed", "failed", "aborted"] as const; + +function isTerminalRunState(state: string): state is (typeof terminalRunStates)[number] { + return terminalRunStates.includes(state as (typeof terminalRunStates)[number]); +} + +function compactHandle( + sessionId: string, + pid: number | null, + tmuxSession: string | null, + tmuxWindow: string | null, +): SessionHandle { + return { + sessionId, + ...(pid === null ? {} : { pid }), + ...(tmuxSession === null ? {} : { tmuxSession }), + ...(tmuxWindow === null ? {} : { tmuxWindow }), + }; +} + +function recoveryHintFor(error: unknown): string { + if (error instanceof DevflowError && error.recoveryHint !== undefined) { + return error.recoveryHint; + } + if (error instanceof Error) { + return error.message; + } + return "session resume failed"; +} diff --git a/packages/session/tsconfig.json b/packages/session/tsconfig.json index b9b6c01..704cb37 100644 --- a/packages/session/tsconfig.json +++ b/packages/session/tsconfig.json @@ -6,5 +6,5 @@ "types": ["node", "vitest"] }, "include": ["src/**/*.ts"], - "references": [{ "path": "../core" }] + "references": [{ "path": "../core" }, { "path": "../db" }] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 91325d7..4794918 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -58,6 +58,21 @@ importers: specifier: 2.1.8 version: 2.1.8(@types/node@22.10.2) + apps/api: + dependencies: + '@devflow/core': + specifier: workspace:* + version: link:../../packages/core + '@devflow/db': + specifier: workspace:* + version: link:../../packages/db + '@devflow/run-engine': + specifier: workspace:* + version: link:../../packages/run-engine + '@devflow/session': + specifier: workspace:* + version: link:../../packages/session + apps/cli: dependencies: commander: @@ -120,7 +135,6 @@ importers: '@devflow/core': specifier: workspace:* version: link:../core - devDependencies: '@devflow/db': specifier: workspace:* version: link:../db diff --git a/tests/fixtures/fake-artifacts/dev/phase-plan@1/colliding-spec.json b/tests/fixtures/fake-artifacts/dev/phase-plan@1/colliding-spec.json new file mode 100644 index 0000000..ba34e28 --- /dev/null +++ b/tests/fixtures/fake-artifacts/dev/phase-plan@1/colliding-spec.json @@ -0,0 +1,24 @@ +{ + "phases": [ + { + "key": "spec", + "title": "Colliding planned phase", + "objective": "Prove planned phases cannot reuse template phase keys.", + "roles": ["spec_writer"], + "expectedArtifact": { + "path": "artifacts/colliding-spec.json", + "schema": "dev/spec@1" + }, + "gates": [], + "tasks": [ + { + "id": "TASK-1", + "title": "Colliding task", + "role": "spec_writer", + "writeSet": ["src/**"], + "dependsOn": [] + } + ] + } + ] +} diff --git a/tests/fixtures/fake-artifacts/dev/phase-plan@1/mixed-unbound-role.json b/tests/fixtures/fake-artifacts/dev/phase-plan@1/mixed-unbound-role.json new file mode 100644 index 0000000..8f4f82f --- /dev/null +++ b/tests/fixtures/fake-artifacts/dev/phase-plan@1/mixed-unbound-role.json @@ -0,0 +1,24 @@ +{ + "phases": [ + { + "key": "mixed-unbound", + "title": "Mixed bound and unbound planned phase", + "objective": "Prove every planned phase role is validated before insertion.", + "roles": ["spec_writer", "missing_role"], + "expectedArtifact": { + "path": "artifacts/mixed-unbound-spec.json", + "schema": "dev/spec@1" + }, + "gates": [], + "tasks": [ + { + "id": "TASK-1", + "title": "Impossible mixed-role task", + "role": "missing_role", + "writeSet": ["src/**"], + "dependsOn": [] + } + ] + } + ] +} diff --git a/tests/fixtures/fake-artifacts/dev/phase-plan@1/ok.json b/tests/fixtures/fake-artifacts/dev/phase-plan@1/ok.json new file mode 100644 index 0000000..a22abd3 --- /dev/null +++ b/tests/fixtures/fake-artifacts/dev/phase-plan@1/ok.json @@ -0,0 +1,24 @@ +{ + "phases": [ + { + "key": "implement", + "title": "Implement requested change", + "objective": "Use the fake development run to prove the engine path.", + "roles": ["spec_writer"], + "expectedArtifact": { + "path": "artifacts/implementation-spec.json", + "schema": "dev/spec@1" + }, + "gates": [], + "tasks": [ + { + "id": "TASK-1", + "title": "Apply implementation", + "role": "spec_writer", + "writeSet": ["src/**"], + "dependsOn": [] + } + ] + } + ] +} diff --git a/tests/fixtures/fake-artifacts/dev/phase-plan@1/skip-only.json b/tests/fixtures/fake-artifacts/dev/phase-plan@1/skip-only.json new file mode 100644 index 0000000..b35cfea --- /dev/null +++ b/tests/fixtures/fake-artifacts/dev/phase-plan@1/skip-only.json @@ -0,0 +1,11 @@ +{ + "phases": [ + { + "key": "documentation-note", + "title": "Documentation Note", + "objective": "Record that this planned phase has no artifact and should be skipped.", + "roles": ["phase_planner"], + "gates": [] + } + ] +} diff --git a/tests/fixtures/fake-artifacts/dev/phase-plan@1/two-phases.json b/tests/fixtures/fake-artifacts/dev/phase-plan@1/two-phases.json new file mode 100644 index 0000000..bf269cf --- /dev/null +++ b/tests/fixtures/fake-artifacts/dev/phase-plan@1/two-phases.json @@ -0,0 +1,44 @@ +{ + "phases": [ + { + "key": "implement-a", + "title": "Implement first fake change", + "objective": "First planned phase for replay serialization tests.", + "roles": ["spec_writer"], + "expectedArtifact": { + "path": "artifacts/implementation-a-spec.json", + "schema": "dev/spec@1" + }, + "gates": [], + "tasks": [ + { + "id": "TASK-A", + "title": "Apply first implementation", + "role": "spec_writer", + "writeSet": ["src/a/**"], + "dependsOn": [] + } + ] + }, + { + "key": "implement-b", + "title": "Implement second fake change", + "objective": "Second planned phase for replay serialization tests.", + "roles": ["spec_writer"], + "expectedArtifact": { + "path": "artifacts/implementation-b-spec.json", + "schema": "dev/spec@1" + }, + "gates": [], + "tasks": [ + { + "id": "TASK-B", + "title": "Apply second implementation", + "role": "spec_writer", + "writeSet": ["src/b/**"], + "dependsOn": ["TASK-A"] + } + ] + } + ] +} diff --git a/tests/fixtures/fake-artifacts/dev/phase-plan@1/unbound-role.json b/tests/fixtures/fake-artifacts/dev/phase-plan@1/unbound-role.json new file mode 100644 index 0000000..3b1d3de --- /dev/null +++ b/tests/fixtures/fake-artifacts/dev/phase-plan@1/unbound-role.json @@ -0,0 +1,24 @@ +{ + "phases": [ + { + "key": "unbound", + "title": "Unbound planned phase", + "objective": "Prove approval-triggered advancement fails terminally on invalid plan roles.", + "roles": ["missing_role"], + "expectedArtifact": { + "path": "artifacts/unbound-spec.json", + "schema": "dev/spec@1" + }, + "gates": [], + "tasks": [ + { + "id": "TASK-1", + "title": "Impossible task", + "role": "missing_role", + "writeSet": ["src/**"], + "dependsOn": [] + } + ] + } + ] +} diff --git a/tests/fixtures/fake-artifacts/dev/phase-plan@1/unknown-schema.json b/tests/fixtures/fake-artifacts/dev/phase-plan@1/unknown-schema.json new file mode 100644 index 0000000..bb37aa1 --- /dev/null +++ b/tests/fixtures/fake-artifacts/dev/phase-plan@1/unknown-schema.json @@ -0,0 +1,24 @@ +{ + "phases": [ + { + "key": "unknown-schema", + "title": "Unknown schema planned phase", + "objective": "Prove fatal planned-phase failures clean up every session.", + "roles": ["spec_writer"], + "expectedArtifact": { + "path": "artifacts/unknown-schema.json", + "schema": "dev/unknown-schema@1" + }, + "gates": [], + "tasks": [ + { + "id": "TASK-1", + "title": "Write unknown schema artifact", + "role": "spec_writer", + "writeSet": ["src/**"], + "dependsOn": [] + } + ] + } + ] +} diff --git a/tsconfig.json b/tsconfig.json index 7d87fab..b6f81a3 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -10,6 +10,7 @@ { "path": "./packages/db" }, { "path": "./packages/run-engine" }, { "path": "./packages/session" }, + { "path": "./apps/api" }, { "path": "./apps/cli" } ] } diff --git a/vitest.workspace.ts b/vitest.workspace.ts index cf7cfee..0f1cc43 100644 --- a/vitest.workspace.ts +++ b/vitest.workspace.ts @@ -27,5 +27,6 @@ export default defineWorkspace([ nodeProject("packages/core", ["packages/core/src/**/*.test.ts"]), nodeProject("packages/session", ["packages/session/src/**/*.test.ts"]), nodeProject("packages/run-engine", ["packages/run-engine/src/**/*.test.ts"]), + nodeProject("apps/api", ["apps/api/src/**/*.test.ts"]), nodeProject("apps/cli", ["apps/cli/src/**/*.test.ts"]), ]);