From 64efeabd33e15ff74d5b1b99dafe8ba66a6333ca Mon Sep 17 00:00:00 2001 From: chungyeong Date: Sun, 10 May 2026 16:48:52 +0900 Subject: [PATCH] feat: add fake phase harness --- apps/cli/package.json | 2 +- docs/plan.md | 35 +- packages/core/package.json | 4 +- packages/core/src/run-event.test.ts | 86 +- packages/core/src/run-event.ts | 149 +- packages/db/package.json | 4 +- packages/db/src/index.ts | 1 + packages/db/src/repositories/run-event.ts | 278 ++ packages/run-engine/package.json | 20 + .../run-engine/src/fake-phase-harness.test.ts | 3059 +++++++++++++++++ packages/run-engine/src/fake-phase-harness.ts | 2022 +++++++++++ packages/run-engine/src/index.ts | 2 + .../run-engine/src/run-event-repository.ts | 1 + packages/run-engine/tsconfig.build.json | 10 + packages/run-engine/tsconfig.json | 10 + packages/session/package.json | 2 +- packages/session/src/fake.test.ts | 53 +- packages/session/src/fake.ts | 25 +- pnpm-lock.yaml | 15 + tsconfig.json | 1 + tsconfig.typecheck.json | 1 + vitest.workspace.ts | 62 +- 22 files changed, 5766 insertions(+), 76 deletions(-) create mode 100644 packages/db/src/repositories/run-event.ts create mode 100644 packages/run-engine/package.json create mode 100644 packages/run-engine/src/fake-phase-harness.test.ts create mode 100644 packages/run-engine/src/fake-phase-harness.ts create mode 100644 packages/run-engine/src/index.ts create mode 100644 packages/run-engine/src/run-event-repository.ts create mode 100644 packages/run-engine/tsconfig.build.json create mode 100644 packages/run-engine/tsconfig.json diff --git a/apps/cli/package.json b/apps/cli/package.json index efb5934..ac9684a 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -9,7 +9,7 @@ "scripts": { "build": "tsup src/index.ts --format esm --clean", "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", - "test": "vitest run" + "test": "cd ../.. && vitest run --project apps/cli" }, "dependencies": { "commander": "12.1.0", diff --git a/docs/plan.md b/docs/plan.md index 95317ef..ba37e4c 100644 --- a/docs/plan.md +++ b/docs/plan.md @@ -1,4 +1,4 @@ -# Devflow Implementation Plan v3 r4 +# Devflow Implementation Plan v3 r9 ## 0. Document Status @@ -11,6 +11,11 @@ - r2 applied CC-6 through CC-10. - r3 applied CC-11 through CC-15. - r4 applies CC-16 through CC-18. +- r5 applies CC-19. +- r6 applies CC-20. +- r7 applies CC-21 through CC-23. +- r8 applies CC-24 through CC-26. +- r9 applies CC-27 through CC-28. ## 1. Stack Decisions @@ -40,7 +45,7 @@ - Pre-commit: - `lefthook`. - Runs `biome check --write` on staged files. - - Runs `tsc -b --noEmit` on changed packages. + - Runs `tsc -p tsconfig.typecheck.json --noEmit`. - Runs related Vitest tests on changed packages. ### 1.3 Database @@ -819,6 +824,7 @@ export interface TranscriptChunk { ### 8.3 Recovery Counters - `sendPrompt` retry: 2. + - Means one initial send plus two adapter-level retries, three physical send attempts max. - `resume` retry: 2. - `rebootstrap` retry: 1. - artifact repair retry: 1. @@ -882,7 +888,7 @@ const PromptEnvelope = z.object({ ### 9.3 Rules - Prompt identity is `dedupKey`. -- Adapter refuses duplicate `dedupKey` for the same session within a run lifetime. +- Adapter treats duplicate `dedupKey` for the same session within a run lifetime as idempotent success and does not reprocess the prompt. - `attempt` increments only when the engine intentionally re-sends after timeout or repair. - Adapter-level retry does not increment attempt. - Completion is never inferred from transcript text. @@ -1152,7 +1158,7 @@ Transitions: | `awaiting_approval` | request_changes | `planning` | increment phase attempts | | `awaiting_approval` | timeout | `paused` | set `paused_from_state='awaiting_approval'` | | `executing` | phase ok, more phases | `executing` | next phase | -| `executing` | phase needs gate | `awaiting_approval` | request gate | +| `executing` | normal workflow approval gate | `awaiting_approval` | request gate | | `executing` | all phases done | `completed` | emit `run.completed`, write final report | | `executing` | unrecoverable error | `failed` | emit `run.failed` | | `executing` | manual `pauseRun` | `paused` | set `paused_from_state='executing'` | @@ -1196,6 +1202,14 @@ Transitions: | `awaiting_approval` | reject / abort | `failed` | | `awaiting_approval` | request_changes | `running`, attempt + 1 | +Replay rules: + +- `phase.started.payload.repair === true` marks that attempt as the single allowed repair attempt. Replaying that attempt MUST use repair instructions, `prompt.repaired`, and must not start a third attempt. +- Repair replay from `running` may reuse an existing `READY` / bootstrapped session even if `last_prompt_hash` still contains the previous attempt's prompt hash; current-attempt prompt send has not happened yet. +- If phase state is `validating` and no artifact row exists yet, replay re-reads and validates the current `expectedArtifactPath` instead of treating the state as corruption. +- If phase state is `validating` and artifact rows already exist for the same phase/path/schema, replay may reuse only an artifact row created at or after the current session `last_prompt_at`; older rows are treated as stale previous-attempt outputs and the file is revalidated. +- Session bootstrap DB row/state changes and `session.created` / `session.ready` events are written in one DB transaction after adapter start succeeds. + ## 14. Approval State States: @@ -1463,6 +1477,7 @@ Human required: - `artifact_invalid_after_repair` - `artifact_timeout_exhausted` +- `prompt_send_exhausted` - `destructive_command_blocked` - `secret_access_blocked` - `backend_unavailable` @@ -1486,7 +1501,7 @@ Fatal: Mapping: - recoverable -> retry; exhausted -> human_required. -- human_required -> run paused and gate created. +- human_required / recovery gate -> run paused and gate created. This is distinct from normal workflow approval gates in §13.1, which use `awaiting_approval`. - fatal -> run failed, sessions disposed, final report best-effort. ## 19. Concurrent Runs and Crash Recovery @@ -1721,6 +1736,16 @@ M5+: | CC-16 | Prompt hash used phaseId but envelope uses phaseKey | prompt hash uses phaseKey | | CC-17 | abortRun transition too narrow | abort from any non-terminal run state | | CC-18 | approval pending transition wording conflicted with pause epoch | pending can transition once per pending epoch; paused may unpause to pending | +| CC-19 | `tsc -b --noEmit` is brittle with TypeScript 5.6 project references on clean worktrees | build still uses `tsc -b`; no-emit verification uses root `tsconfig.typecheck.json` | +| CC-20 | `sendPrompt` retry count was ambiguous against Temporal activity attempts | §8.3 now states retry budget means initial attempt plus retries; §15.2 remains Temporal-level attempts only | +| CC-21 | Duplicate prompt dedup handling conflicted with adapter retry idempotency | duplicate `dedupKey` returns idempotent success without reprocessing | +| CC-22 | Normal workflow approval gates and human-required recovery gates were easy to conflate | §13.1 names normal workflow gates; §18 keeps human_required recovery gates paused | +| CC-23 | Phase start and event append could diverge under retry/error | phase start and `phase.started` append occur in one DB transaction | +| CC-24 | Repair attempt replay lost repair prompt identity and one-repair budget | repair attempts are derived from `phase.started.payload.repair`, replay uses repair instructions and `prompt.repaired`, and cannot start attempt 3 | +| CC-25 | `validating` replay failed if crash happened before artifact row insert | replay revalidates the expected artifact file when state is `validating` but no artifact row exists | +| CC-26 | Session bootstrap state/events could diverge | session row/state and `session.created` / `session.ready` events are committed in one DB transaction | +| CC-27 | `validating` replay could reuse stale previous-attempt artifact rows | artifact-row replay requires `artifact.created_at >= tui_sessions.last_prompt_at`; otherwise the file is revalidated | +| CC-28 | repair `running` replay rejected existing READY sessions with previous attempt prompt hash | current-attempt repair prompt is considered unsent, so replay may reuse the session and send `prompt.repaired` | ### Future Open Questions diff --git a/packages/core/package.json b/packages/core/package.json index e40720b..a503c96 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -8,8 +8,8 @@ "types": "./dist/index.d.ts", "scripts": { "build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json", - "typecheck": "tsc -b --noEmit", - "test": "vitest run" + "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", + "test": "cd ../.. && vitest run --project packages/core" }, "dependencies": { "ajv": "8.17.1", diff --git a/packages/core/src/run-event.test.ts b/packages/core/src/run-event.test.ts index 623dead..4faebc4 100644 --- a/packages/core/src/run-event.test.ts +++ b/packages/core/src/run-event.test.ts @@ -1,9 +1,93 @@ import { describe, expect, it } from "vitest"; -import { RunEventPayloadSchemas, RunEventTypeValues } from "./run-event.js"; +import { RunEvent, RunEventPayloadSchemas, RunEventTypeValues } from "./run-event.js"; describe("run events", () => { it("keeps a payload schema for every closed run event type", () => { expect(Object.keys(RunEventPayloadSchemas).sort()).toEqual([...RunEventTypeValues].sort()); }); + + it("rejects malformed payloads for structured event families", () => { + expect( + RunEventPayloadSchemas["prompt.sent"].safeParse({ + roleId: "implementer", + }).success, + ).toBe(false); + expect( + RunEventPayloadSchemas["artifact.validated"].safeParse({ + artifactId: "not-a-uuid", + hash: "not-a-sha", + path: "/tmp/spec.json", + schemaId: "dev/spec@1", + }).success, + ).toBe(false); + expect( + RunEventPayloadSchemas["run.paused"].safeParse({ + cause: "human_required:artifact_repair_failed", + }).success, + ).toBe(false); + expect(RunEventPayloadSchemas["run.resumed"].safeParse({}).success).toBe(false); + expect( + RunEventPayloadSchemas["approval.resolved"].safeParse({ + action: "pause", + approvalRequestId: "00000000-0000-4000-8000-000000000000", + }).success, + ).toBe(false); + expect( + RunEventPayloadSchemas["approval.resolved"].safeParse({ + action: "approve", + approvalRequestId: "00000000-0000-4000-8000-000000000000", + }).success, + ).toBe(true); + expect( + RunEventPayloadSchemas["session.ready"].safeParse({ + roleId: "implementer", + sessionId: "00000000-0000-4000-8000-000000000000", + }).success, + ).toBe(false); + expect( + RunEventPayloadSchemas["session.failed"].safeParse({ + sessionId: "00000000-0000-4000-8000-000000000000", + }).success, + ).toBe(false); + expect( + RunEventPayloadSchemas["phase.started"].safeParse({ + attempt: 0, + phaseKey: "implement", + }).success, + ).toBe(false); + expect( + RunEventPayloadSchemas["artifact.expected"].safeParse({ + attempt: 0, + path: "/tmp/spec.json", + schemaId: "dev/spec@1", + }).success, + ).toBe(false); + expect(RunEventPayloadSchemas["phase.skipped"].safeParse({}).success).toBe(false); + expect( + RunEventPayloadSchemas["review.batch_recorded"].safeParse({ + attempt: 0, + reviewerRole: "reviewer", + }).success, + ).toBe(false); + }); + + it("binds exported RunEvent validation to each event type payload schema", () => { + expect( + RunEvent.safeParse({ + type: "session.ready", + payload: {}, + }).success, + ).toBe(false); + expect( + RunEvent.safeParse({ + type: "session.ready", + payload: { + recoveryAttempts: 0, + roleId: "implementer", + sessionId: "00000000-0000-4000-8000-000000000000", + }, + }).success, + ).toBe(true); + }); }); diff --git a/packages/core/src/run-event.ts b/packages/core/src/run-event.ts index 4a79a8c..f10cc85 100644 --- a/packages/core/src/run-event.ts +++ b/packages/core/src/run-event.ts @@ -1,5 +1,7 @@ import { z } from "zod"; +import { ApprovalDecisionAction } from "./enums.js"; + export const RunEventTypeValues = [ "run.created", "run.started", @@ -40,15 +42,146 @@ export const RunEventTypeValues = [ export const RunEventType = z.enum(RunEventTypeValues); export type RunEventType = z.infer; -const payloadSchema = z.record(z.unknown()); +const uuid = z.string().uuid(); +const sha256 = z.string().regex(/^[a-f0-9]{64}$/); +const nonEmptyString = z.string().min(1); +const phaseAttempt = z.number().int().positive(); -export const RunEventPayloadSchemas = Object.freeze( - Object.fromEntries(RunEventTypeValues.map((type) => [type, payloadSchema])), -) as Readonly>; - -export const RunEvent = z.object({ - type: RunEventType, - payload: payloadSchema, +const looseObject = z.object({}).passthrough(); +const phasePayload = z + .object({ + phaseKey: nonEmptyString, + attempt: phaseAttempt, + }) + .passthrough(); +const promptPayload = z + .object({ + roleId: nonEmptyString, + dedupKey: sha256, + }) + .passthrough(); +const artifactWaitPayload = z + .object({ + path: nonEmptyString, + schemaId: nonEmptyString, + attempt: phaseAttempt, + }) + .passthrough(); +const artifactValidationPayload = z + .object({ + artifactId: uuid, + hash: sha256, + path: nonEmptyString, + schemaId: nonEmptyString, + }) + .passthrough(); +const sessionBasePayload = z + .object({ + sessionId: uuid, + roleId: nonEmptyString, + }) + .passthrough(); +const sessionPromptPayload = sessionBasePayload.extend({ + dedupKey: sha256, }); +const sessionRecoveryPayload = sessionBasePayload.extend({ + recoveryAttempts: z.number().int().nonnegative(), +}); +const approvalRequestedPayload = z + .object({ + approvalRequestId: uuid, + approvalIdempotencyKey: nonEmptyString, + gateKey: nonEmptyString, + }) + .passthrough(); +const approvalResolvedPayload = z + .object({ + action: ApprovalDecisionAction, + approvalRequestId: uuid, + }) + .passthrough(); +const commandPayload = z + .object({ + commandId: uuid, + }) + .passthrough(); +const findingVerifierResolvedPayload = z + .object({ + findingId: uuid, + }) + .passthrough(); +const backtestIterationPayload = z + .object({ + iterationId: uuid, + }) + .passthrough(); +const reviewBatchRecordedPayload = z + .object({ + reviewerRole: nonEmptyString, + attempt: phaseAttempt, + }) + .passthrough(); +const runPausedPayload = z + .object({ + cause: nonEmptyString, + pausedFromState: nonEmptyString, + }) + .passthrough(); + +export const RunEventPayloadSchemas = Object.freeze({ + "run.created": looseObject, + "run.started": looseObject, + "run.paused": runPausedPayload, + "run.resumed": runPausedPayload.pick({ cause: true }).passthrough(), + "run.completed": looseObject, + "run.failed": looseObject, + "run.aborted": looseObject, + "phase.started": phasePayload, + "phase.completed": phasePayload, + "phase.failed": phasePayload.extend({ reason: nonEmptyString.optional() }), + "phase.skipped": phasePayload, + "prompt.sent": promptPayload, + "prompt.repaired": promptPayload, + "artifact.expected": artifactWaitPayload, + "artifact.validated": artifactValidationPayload, + "artifact.invalid": artifactValidationPayload.extend({ errors: z.array(z.unknown()) }), + "artifact.timeout": artifactWaitPayload, + "approval.requested": approvalRequestedPayload, + "approval.resolved": approvalResolvedPayload, + "session.created": sessionBasePayload.extend({ backend: nonEmptyString }), + "session.ready": sessionRecoveryPayload, + "session.busy": sessionPromptPayload, + "session.idle": sessionPromptPayload, + "session.crashed": sessionRecoveryPayload, + "session.recovered": sessionRecoveryPayload, + "session.failed": sessionBasePayload, + "command.started": commandPayload, + "command.completed": commandPayload, + "command.failed": commandPayload, + "review.batch_recorded": reviewBatchRecordedPayload, + "finding.verifier_resolved": findingVerifierResolvedPayload, + "backtest.iteration_started": backtestIterationPayload, + "backtest.iteration_completed": backtestIterationPayload, + "backtest.objective_evaluated": backtestIterationPayload, +} satisfies Record) as Readonly>; + +export const RunEvent = z + .object({ + type: RunEventType, + payload: z.unknown(), + }) + .superRefine((event, ctx) => { + const payload = RunEventPayloadSchemas[event.type].safeParse(event.payload); + if (payload.success) { + return; + } + + for (const issue of payload.error.issues) { + ctx.addIssue({ + ...issue, + path: ["payload", ...issue.path], + }); + } + }); export type RunEvent = z.infer; diff --git a/packages/db/package.json b/packages/db/package.json index 2a86eaf..140866e 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -8,8 +8,8 @@ "types": "./dist/index.d.ts", "scripts": { "build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json", - "typecheck": "tsc -b --noEmit", - "test": "vitest run" + "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", + "test": "cd ../.. && vitest run --project packages/db" }, "dependencies": { "@devflow/core": "workspace:*", diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index be2c8b3..ceb40c2 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -1,3 +1,4 @@ export { createDbClient, type DbClient } from "./client.js"; +export * from "./repositories/run-event.js"; export * from "./repositories/transcript.js"; export * from "./schema/index.js"; diff --git a/packages/db/src/repositories/run-event.ts b/packages/db/src/repositories/run-event.ts new file mode 100644 index 0000000..78ef727 --- /dev/null +++ b/packages/db/src/repositories/run-event.ts @@ -0,0 +1,278 @@ +import { + DevflowError, + RunEventPayloadSchemas, + RunEventType, + type RunEventType as RunEventTypeName, + canonicalize, +} from "@devflow/core"; +import { and, desc, eq, sql } from "drizzle-orm"; + +import type { DbClient } from "../client.js"; +import { runEvents, runPhases } from "../schema/index.js"; + +type Database = DbClient["db"]; +type TransactionDatabase = Parameters[0]>[0]; + +export interface AppendRunEventInput { + runId: string; + phaseId?: string; + type: RunEventTypeName; + payload: Record; + idempotencyKey: string; +} + +export interface RunEventRow { + id: bigint; + runId: string; + phaseId: string | null; + seq: bigint; + type: string; + payload: unknown; + idempotencyKey: string; + ts: Date; +} + +export class RunEventRepository { + constructor(private readonly db: Database) {} + + async append(input: AppendRunEventInput): Promise { + return this.db.transaction(async (tx) => this.appendInTransaction(tx, input)); + } + + async appendInTransaction( + tx: TransactionDatabase, + input: AppendRunEventInput, + ): Promise { + const type = RunEventType.parse(input.type); + const payload = RunEventPayloadSchemas[type].parse(input.payload) as Record; + if (isPhaseScopedEvent(type) && input.phaseId === undefined) { + throw new DevflowError("Run event phase id is required for phase-scoped event", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + }); + } + if (input.idempotencyKey.length === 0) { + throw new DevflowError("Run event idempotency key is required", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + ...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }), + }); + } + const expectedIdempotencyKey = expectedRunEventIdempotencyKey(input, type, payload); + if (expectedIdempotencyKey !== undefined && input.idempotencyKey !== expectedIdempotencyKey) { + throw new DevflowError("Run event idempotency key does not match event contract", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + ...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }), + }); + } + + await tx.execute( + sql`SELECT pg_advisory_xact_lock(hashtext('devflow:run-events'), hashtext(${input.runId}))`, + ); + + if (input.phaseId !== undefined) { + const [phase] = await tx + .select({ id: runPhases.id }) + .from(runPhases) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))) + .limit(1); + if (phase === undefined) { + throw new DevflowError("Run event phase does not belong to run", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + } + + const existing = await tx + .select() + .from(runEvents) + .where( + and(eq(runEvents.runId, input.runId), eq(runEvents.idempotencyKey, input.idempotencyKey)), + ) + .limit(1); + + if (existing[0] !== undefined) { + assertIdempotentReplayMatches(input, type, payload, existing[0]); + return existing[0]; + } + + const latest = await tx + .select({ seq: runEvents.seq }) + .from(runEvents) + .where(eq(runEvents.runId, input.runId)) + .orderBy(desc(runEvents.seq)) + .limit(1); + const seq = (latest[0]?.seq ?? 0n) + 1n; + + const inserted = await tx + .insert(runEvents) + .values({ + runId: input.runId, + phaseId: input.phaseId, + seq, + type, + payload, + idempotencyKey: input.idempotencyKey, + }) + .returning(); + + const event = inserted[0]; + if (event === undefined) { + throw new DevflowError("Run event insert returned no row", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + ...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }), + }); + } + + return event; + } +} + +function isPhaseScopedEvent(type: RunEventTypeName): boolean { + return ( + type.startsWith("phase.") || type.startsWith("artifact.") || type === "review.batch_recorded" + ); +} + +function assertIdempotentReplayMatches( + input: AppendRunEventInput, + type: RunEventTypeName, + payload: Record, + existing: RunEventRow, +) { + const sameType = existing.type === type; + const samePhase = !isPhaseScopedEvent(type) || existing.phaseId === (input.phaseId ?? null); + const samePayload = canonicalize(normalizeJson(existing.payload)) === canonicalize(payload); + if (sameType && samePhase && samePayload) { + return; + } + + throw new DevflowError("Run event idempotency key replay does not match existing event", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + ...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }), + }); +} + +function expectedRunEventIdempotencyKey( + input: AppendRunEventInput, + type: RunEventTypeName, + payload: Record, +): string | undefined { + switch (type) { + case "run.created": + case "run.started": + case "run.completed": + case "run.failed": + case "run.aborted": + return `${type}:${input.runId}`; + case "run.paused": + case "run.resumed": + return `${type}:${input.runId}:${stringPayload(payload, "cause")}`; + case "phase.started": + case "phase.completed": + case "phase.failed": + case "phase.skipped": + return `${type}:${requiredPhaseId(input)}:${numberPayload(payload, "attempt")}`; + case "prompt.sent": + case "prompt.repaired": + return `${type}:${stringPayload(payload, "dedupKey")}`; + case "artifact.expected": + case "artifact.timeout": + return `${type}:${requiredPhaseId(input)}:${numberPayload(payload, "attempt")}:${stringPayload(payload, "path")}`; + case "artifact.validated": + case "artifact.invalid": + return `${type}:${requiredPhaseId(input)}:${stringPayload(payload, "path")}:${stringPayload(payload, "hash")}`; + case "approval.requested": + return `approval.requested:${stringPayload(payload, "approvalIdempotencyKey")}`; + case "approval.resolved": + return `approval.resolved:${stringPayload(payload, "approvalRequestId")}:${stringPayload(payload, "action")}`; + case "session.created": + case "session.failed": + return `${type}:${stringPayload(payload, "sessionId")}`; + case "session.busy": + case "session.idle": + return `${type}:${stringPayload(payload, "sessionId")}:${stringPayload(payload, "dedupKey")}`; + case "session.ready": + case "session.crashed": + case "session.recovered": + return `${type}:${stringPayload(payload, "sessionId")}:${numberPayload(payload, "recoveryAttempts")}`; + case "command.started": + case "command.completed": + case "command.failed": + return `${type}:${stringPayload(payload, "commandId")}`; + case "review.batch_recorded": + return `review.batch_recorded:${requiredPhaseId(input)}:${stringPayload(payload, "reviewerRole")}:${numberPayload(payload, "attempt")}`; + case "finding.verifier_resolved": + return `finding.verifier_resolved:${stringPayload(payload, "findingId")}`; + case "backtest.iteration_started": + case "backtest.iteration_completed": + case "backtest.objective_evaluated": + return `${type}:${stringPayload(payload, "iterationId")}`; + default: + return undefined; + } +} + +function requiredPhaseId(input: AppendRunEventInput): string { + if (input.phaseId === undefined) { + throw new DevflowError("Run event phase id is required for idempotency key", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + }); + } + + return input.phaseId; +} + +function stringPayload(payload: Record, key: string): string { + const value = payload[key]; + if (typeof value !== "string" || value.length === 0) { + throw new DevflowError(`Run event payload is missing string field ${key}`, { + class: "fatal", + code: "internal_state_corruption", + }); + } + + return value; +} + +function numberPayload(payload: Record, key: string): number { + const value = payload[key]; + if (typeof value !== "number" || !Number.isInteger(value)) { + throw new DevflowError(`Run event payload is missing integer field ${key}`, { + class: "fatal", + code: "internal_state_corruption", + }); + } + + return value; +} + +function normalizeJson(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map((item) => normalizeJson(item)); + } + + if (value !== null && typeof value === "object") { + return Object.fromEntries( + Object.entries(value as Record).map(([key, child]) => [ + key, + normalizeJson(child), + ]), + ); + } + + return value; +} diff --git a/packages/run-engine/package.json b/packages/run-engine/package.json new file mode 100644 index 0000000..22799cd --- /dev/null +++ b/packages/run-engine/package.json @@ -0,0 +1,20 @@ +{ + "name": "@devflow/run-engine", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "scripts": { + "build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json", + "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", + "test": "cd ../.. && vitest run --project packages/run-engine" + }, + "dependencies": { + "@devflow/core": "workspace:*", + "@devflow/db": "workspace:*", + "@devflow/session": "workspace:*", + "drizzle-orm": "0.45.2" + } +} diff --git a/packages/run-engine/src/fake-phase-harness.test.ts b/packages/run-engine/src/fake-phase-harness.test.ts new file mode 100644 index 0000000..3086c91 --- /dev/null +++ b/packages/run-engine/src/fake-phase-harness.test.ts @@ -0,0 +1,3059 @@ +import { randomUUID } from "node:crypto"; +import { mkdirSync, mkdtempSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { dirname, join } from "node:path"; +import { eq, inArray } from "drizzle-orm"; +import { afterEach, describe, expect, it } from "vitest"; + +import { DevflowError, hash } from "@devflow/core"; +import { type DbClient, createDbClient } from "@devflow/db"; +import { + approvalRequests, + artifacts, + runEvents, + runPhases, + runs, + tuiSessions, + tuiTranscriptChunks, + workflowTemplates, +} from "@devflow/db"; +import { + FakeSessionAdapter, + type ProbeResult, + type SessionHandle, + type StartInput, + type TranscriptChunk, +} from "@devflow/session"; + +import { runSingleFakePhase } from "./fake-phase-harness.js"; +import { RunEventRepository } from "./run-event-repository.js"; + +const databaseUrl = + process.env.DATABASE_URL ?? "postgres://devflow:devflow@127.0.0.1:55432/devflow"; + +class RebootstrapFailsOnceFakeAdapter extends FakeSessionAdapter { + rebootstrapAttempts = 0; + + override async rebootstrap(handle: SessionHandle): Promise { + this.rebootstrapAttempts += 1; + if (this.rebootstrapAttempts === 1) { + throw new DevflowError("transient rebootstrap failure", { + class: "recoverable", + code: "pane_briefly_unresponsive", + }); + } + + return super.rebootstrap(handle); + } +} + +class RebootstrapWritesStaleArtifactFakeAdapter extends FakeSessionAdapter { + private expectedArtifactPath: string | undefined; + + override async start(input: StartInput): Promise { + this.expectedArtifactPath = input.expectedArtifactPath; + return super.start(input); + } + + override async rebootstrap(handle: SessionHandle): Promise { + const sessionHandle = await super.rebootstrap(handle); + if (this.expectedArtifactPath !== undefined) { + mkdirSync(dirname(this.expectedArtifactPath), { recursive: true }); + writeFileSync(this.expectedArtifactPath, JSON.stringify({ fake: "invalid" })); + } + + return sessionHandle; + } +} + +class RebootstrapFatalFakeAdapter extends FakeSessionAdapter { + override async rebootstrap(_handle: SessionHandle): Promise { + throw new DevflowError("fatal rebootstrap failure", { + class: "fatal", + code: "internal_state_corruption", + }); + } +} + +class RebootstrapUnknownFailureFakeAdapter extends FakeSessionAdapter { + override async rebootstrap(_handle: SessionHandle): Promise { + throw new Error("unclassified rebootstrap failure"); + } +} + +class RebootstrapHumanRequiredFakeAdapter extends FakeSessionAdapter { + override async rebootstrap(_handle: SessionHandle): Promise { + throw new DevflowError("human rebootstrap failure", { + class: "human_required", + code: "backend_unavailable", + }); + } +} + +class ProbeRecoverableFakeAdapter extends FakeSessionAdapter { + override async probe(_handle: SessionHandle): Promise { + throw new DevflowError("recoverable probe failure", { + class: "recoverable", + code: "pane_briefly_unresponsive", + }); + } +} + +class ProbeUnknownFailureFakeAdapter extends FakeSessionAdapter { + override async probe(_handle: SessionHandle): Promise { + throw new Error("unclassified probe failure"); + } +} + +class BreakArtifactParentFakeAdapter extends FakeSessionAdapter { + override async sendPrompt( + handle: SessionHandle, + envelope: Parameters[1], + ): Promise<{ promptId: string }> { + mkdirSync(dirname(dirname(envelope.expectedArtifact)), { recursive: true }); + writeFileSync(dirname(envelope.expectedArtifact), "not a directory"); + return super.sendPrompt(handle, envelope); + } +} + +class WriteDirectoryArtifactFakeAdapter extends FakeSessionAdapter { + override async sendPrompt( + handle: SessionHandle, + envelope: Parameters[1], + ): Promise<{ promptId: string }> { + mkdirSync(envelope.expectedArtifact, { recursive: true }); + return super.sendPrompt(handle, envelope); + } +} + +class StartFailsFakeAdapter extends FakeSessionAdapter { + constructor(private readonly error: DevflowError) { + super(); + } + + override async start(_input: StartInput): Promise { + throw this.error; + } +} + +class PromptWritesArtifactBeforeReturnFakeAdapter extends FakeSessionAdapter { + override async sendPrompt( + handle: SessionHandle, + envelope: Parameters[1], + ): Promise<{ promptId: string }> { + mkdirSync(dirname(envelope.expectedArtifact), { recursive: true }); + writeFileSync( + envelope.expectedArtifact, + JSON.stringify({ + summary: "Fast artifact", + requirements: [{ id: "REQ-1", description: "Accept pre-return artifact" }], + acceptanceCriteria: ["Accepted"], + risks: [], + }), + ); + return super.sendPrompt(handle, { + ...envelope, + instructions: "Scenario: timeout\nAlready wrote artifact before returning.", + }); + } +} + +class AcceptedThenTransientFakeAdapter extends FakeSessionAdapter { + sendAttempts = 0; + + override async sendPrompt( + handle: SessionHandle, + envelope: Parameters[1], + ): Promise<{ promptId: string }> { + this.sendAttempts += 1; + const result = await super.sendPrompt(handle, envelope); + if (this.sendAttempts === 1) { + throw new DevflowError("lost prompt acknowledgement", { + class: "recoverable", + code: "prompt_send_transient", + }); + } + + return result; + } +} + +class CaptureCursorFakeAdapter extends FakeSessionAdapter { + capturedFromSeq: bigint | undefined; + + constructor( + private readonly db: DbClient["db"], + options: ConstructorParameters[0], + ) { + super(options); + } + + override async sendPrompt( + handle: SessionHandle, + envelope: Parameters[1], + ): Promise<{ promptId: string }> { + const result = await super.sendPrompt(handle, envelope); + await this.db.insert(tuiTranscriptChunks).values({ + sessionId: handle.sessionId, + seq: 1n, + content: "[fake] pre-captured transcript chunk", + capturedAt: new Date(), + }); + await this.db + .update(tuiSessions) + .set({ lastCaptureSeq: 1n }) + .where(eq(tuiSessions.id, handle.sessionId)); + return result; + } + + override async *capture(handle: SessionHandle, fromSeq: bigint): AsyncIterable { + this.capturedFromSeq = fromSeq; + yield* super.capture(handle, fromSeq); + } +} + +describe("runSingleFakePhase", () => { + let client: DbClient | undefined; + const templateIds: string[] = []; + const runIds: string[] = []; + const tempRoots: string[] = []; + + afterEach(async () => { + if (client !== undefined) { + if (runIds.length > 0) { + await client.db + .delete(approvalRequests) + .where(inArray(approvalRequests.runId, [...runIds])); + await client.db.delete(runs).where(inArray(runs.id, [...runIds])); + } + if (templateIds.length > 0) { + await client.db + .delete(workflowTemplates) + .where(inArray(workflowTemplates.id, [...templateIds])); + } + await client.close(); + client = undefined; + } + + for (const root of tempRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } + runIds.length = 0; + templateIds.length = 0; + }); + + async function createRunAndPhase( + runState = "executing", + phaseState = "pending", + phaseAttempts = 0, + ) { + client = createDbClient(databaseUrl); + const templateId = randomUUID(); + const runId = randomUUID(); + const phaseId = randomUUID(); + templateIds.push(templateId); + runIds.push(runId); + + await client.db.insert(workflowTemplates).values({ + id: templateId, + name: `template-${templateId}`, + version: 1, + hash: `hash-${templateId}`, + definition: {}, + }); + await client.db.insert(runs).values({ + id: runId, + templateId, + templateHash: `hash-${templateId}`, + state: runState, + repoPath: `/tmp/devflow-${runId}`, + baseBranch: "main", + worktreeRoot: `/tmp/devflow-${runId}/main`, + }); + await client.db.insert(runPhases).values({ + id: phaseId, + runId, + phaseKey: "implement", + seq: 1, + state: phaseState, + attempts: phaseAttempts, + }); + + return { db: client.db, phaseId, runId }; + } + + async function recordPhaseStarted( + db: DbClient["db"], + runId: string, + phaseId: string, + attempt = 1, + repair = false, + ) { + await db.insert(runEvents).values({ + runId, + phaseId, + seq: 1n, + type: "phase.started", + payload: repair + ? { phaseKey: "implement", attempt, repair } + : { phaseKey: "implement", attempt }, + idempotencyKey: `phase.started:${phaseId}:${attempt}`, + }); + } + + function repairInstructionsForTest(instructions: string): string { + const repairScenario = + /^Repair-Scenario:\s*([A-Za-z0-9_-]+)\s*$/m.exec(instructions)?.[1] ?? "ok"; + return instructions.replace( + /^Scenario:\s*[A-Za-z0-9_-]+\s*$/m, + `Scenario: ${repairScenario}\nRepair the artifact so it conforms to the expected schema.`, + ); + } + + it.each(["paused", "awaiting_approval"] as const)( + "refuses to start a fake phase when the run is %s", + async (runState) => { + const { db, phaseId, runId } = await createRunAndPhase(runState); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-non-executing-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 100 }, + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: runState }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 0, state: "pending" }); + + const events = await db.select().from(runEvents).where(eq(runEvents.runId, runId)); + expect(events).toEqual([]); + + const sessions = await db.select().from(tuiSessions).where(eq(tuiSessions.runId, runId)); + expect(sessions).toEqual([]); + }, + ); + + it.each(["running", "awaiting_artifact", "validating", "awaiting_approval"] as const)( + "refuses to start an initial fake phase when the phase is already %s", + async (phaseState) => { + const { db, phaseId, runId } = await createRunAndPhase("executing", phaseState, 1); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-active-phase-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 100 }, + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 1, state: phaseState }); + + const events = await db.select().from(runEvents).where(eq(runEvents.runId, runId)); + expect(events).toEqual([]); + + const sessions = await db.select().from(tuiSessions).where(eq(tuiSessions.runId, runId)); + expect(sessions).toEqual([]); + }, + ); + + it("rolls back phase start when recording the phase.started event fails", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-start-event-conflict-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + await db.insert(runEvents).values({ + runId, + phaseId, + seq: 1n, + type: "phase.started", + payload: { phaseKey: "other", attempt: 1 }, + idempotencyKey: `phase.started:${phaseId}:1`, + }); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 100 }, + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 0, state: "pending" }); + + const events = await db + .select({ idempotencyKey: runEvents.idempotencyKey }) + .from(runEvents) + .where(eq(runEvents.runId, runId)); + expect(events).toEqual([{ idempotencyKey: `phase.started:${phaseId}:1` }]); + }); + + it("runs one fake phase through prompt, artifact validation, transcript persistence, and events", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000010", + }); + + expect(result).toMatchObject({ + artifactValid: true, + promptId: hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseKey: "implement", + roleId: "implementer", + runId, + }), + sessionId, + }); + + const [artifact] = await db.select().from(artifacts).where(eq(artifacts.id, result.artifactId)); + expect(artifact).toMatchObject({ + hash: result.artifactHash, + path: expectedArtifactPath, + schemaId: "dev/spec@1", + valid: true, + }); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "completed", attempts: 1 }); + await expectRunCompleted(db, runId); + + const [session] = await db + .select({ + expectedArtifactPath: tuiSessions.expectedArtifactPath, + expectedSchema: tuiSessions.expectedSchema, + lastCaptureSeq: tuiSessions.lastCaptureSeq, + lastPromptHash: tuiSessions.lastPromptHash, + state: tuiSessions.state, + }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toMatchObject({ + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastCaptureSeq: expect.any(BigInt), + lastPromptHash: result.promptId, + state: "READY", + }); + expect(session?.lastCaptureSeq).toBeGreaterThan(0n); + + const chunks = await db + .select() + .from(tuiTranscriptChunks) + .where(eq(tuiTranscriptChunks.sessionId, sessionId)) + .orderBy(tuiTranscriptChunks.seq); + expect(chunks.map((chunk) => chunk.seq)).toEqual([1n, 2n, 3n]); + + const events = await db + .select({ + seq: runEvents.seq, + type: runEvents.type, + idempotencyKey: runEvents.idempotencyKey, + }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.seq)).toEqual([1n, 2n, 3n, 4n, 5n, 6n, 7n, 8n, 9n, 10n]); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "session.created", + "session.ready", + "session.busy", + "prompt.sent", + "artifact.expected", + "artifact.validated", + "session.idle", + "phase.completed", + "run.completed", + ]); + expect(events.at(-1)?.idempotencyKey).toBe(`run.completed:${runId}`); + expect(new Set(events.map((event) => event.idempotencyKey)).size).toBe(events.length); + }); + + it("treats an accepted prompt retry with a duplicate dedup key as idempotent success", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-lost-ack-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + const adapter = new AcceptedThenTransientFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }); + + const result = await runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000035", + }); + + expect(result.artifactValid).toBe(true); + expect(adapter.sendAttempts).toBe(2); + await expectRunCompleted(db, runId); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + }); + + it("resumes a running phase when prompt delivery succeeded before prompt.sent was recorded", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "running", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-prompt-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = "Scenario: ok\nWrite the development specification."; + const sessionId = randomUUID(); + const adapter = new FakeSessionAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }); + const handle = await adapter.start({ + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + }); + const dedupKey = hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + await adapter.sendPrompt(handle, { + uuid: "00000000-0000-4000-8000-000000000037", + runId, + roleId: "implementer", + phaseKey: "implement", + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + dedupKey, + instructions, + }); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: dedupKey, + lastPromptAt: new Date(), + state: "BUSY", + }); + + const result = await runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000038", + }); + + expect(result.artifactValid).toBe(true); + await expectRunCompleted(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toContain("prompt.sent"); + expect(events.map((event) => event.type).filter((type) => type === "phase.started")).toEqual([ + "phase.started", + ]); + }); + + it("resumes a running phase when the crash happened before session creation", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "running", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-pre-session-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result.artifactValid).toBe(true); + await expectRunCompleted(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.filter((event) => event.type === "phase.started")).toHaveLength(1); + expect(events.map((event) => event.type)).toContain("session.created"); + }); + + it("resumes an awaiting_artifact phase without sending the prompt again", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "awaiting_artifact", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-awaiting-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = "Scenario: ok\nWrite the development specification."; + const promptId = hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const sessionId = randomUUID(); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptId, + lastPromptAt: new Date(), + state: "BUSY", + }); + mkdirSync(dirname(expectedArtifactPath), { recursive: true }); + writeFileSync( + expectedArtifactPath, + JSON.stringify({ + summary: "Replay artifact", + requirements: [{ id: "REQ-1", description: "Resume awaiting artifact" }], + acceptanceCriteria: ["Accepted"], + risks: [], + }), + ); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result).toMatchObject({ artifactValid: true, promptId, sessionId }); + await expectRunCompleted(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).not.toContain("prompt.sent"); + expect(events.map((event) => event.type)).toContain("artifact.expected"); + expect(events.map((event) => event.type)).toContain("artifact.validated"); + }); + + it("resumes a validating phase with an already validated artifact", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-validating-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = "Scenario: ok\nWrite the development specification."; + const sessionId = randomUUID(); + const promptId = hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const artifactHash = "c".repeat(64); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptId, + lastPromptAt: new Date("2026-01-01T00:00:00.000Z"), + state: "READY", + }); + const artifactId = randomUUID(); + await db.insert(artifacts).values({ + id: artifactId, + runId, + phaseId, + path: expectedArtifactPath, + schemaId: "dev/spec@1", + hash: artifactHash, + valid: true, + }); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result).toMatchObject({ + artifactHash, + artifactId, + artifactValid: true, + promptId, + sessionId, + }); + await expectRunCompleted(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "artifact.validated", + "session.idle", + "phase.completed", + "session.ready", + "run.completed", + ]); + }); + + it("does not replay a validating artifact from a different expected path", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-wrong-path-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: "d".repeat(64), + lastPromptAt: new Date(), + state: "READY", + }); + await db.insert(artifacts).values({ + id: randomUUID(), + runId, + phaseId, + path: join(worktreeRoot, "artifacts", "other-spec.json"), + schemaId: "dev/spec@1", + hash: "e".repeat(64), + valid: true, + }); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 100 }, + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run?.state).toBe("executing"); + }); + + it("replays an invalid validating artifact and uses the one repair attempt", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-invalid-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + const instructions = + "Scenario: invalid\nRepair-Scenario: ok\nWrite the development specification."; + const promptId = hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const adapter = new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }); + await adapter.start({ + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + }); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptId, + lastPromptAt: new Date("2026-01-01T00:00:00.000Z"), + state: "READY", + }); + const invalidArtifactId = randomUUID(); + await db.insert(artifacts).values({ + id: invalidArtifactId, + runId, + phaseId, + path: expectedArtifactPath, + schemaId: "dev/spec@1", + hash: "a".repeat(64), + valid: false, + validationError: { errors: [{ keyword: "required" }] }, + }); + + const result = await runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result.artifactValid).toBe(true); + await expectRunCompleted(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "artifact.invalid", + "session.idle", + "phase.started", + "session.busy", + "prompt.repaired", + "artifact.expected", + "artifact.validated", + "session.idle", + "phase.completed", + "session.ready", + "run.completed", + ]); + }); + + it("resumes a repair attempt after phase.started but before prompt.repaired", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "running", 2); + await recordPhaseStarted(db, runId, phaseId, 2, true); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-repair-running-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = + "Scenario: invalid\nRepair-Scenario: ok\nWrite the development specification."; + const priorPromptId = hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const sessionId = randomUUID(); + const adapter = new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }); + await adapter.start({ + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + }); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: priorPromptId, + lastPromptAt: new Date(), + state: "READY", + }); + + const result = await runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result.artifactValid).toBe(true); + await expectRunCompleted(db, runId); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 2, state: "completed" }); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toContain("prompt.repaired"); + expect(events.filter((event) => event.type === "phase.started")).toHaveLength(1); + }); + + it("resumes a repair attempt while awaiting its artifact", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "awaiting_artifact", 2); + await recordPhaseStarted(db, runId, phaseId, 2, true); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-repair-awaiting-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = + "Scenario: invalid\nRepair-Scenario: ok\nWrite the development specification."; + const repairInstructions = repairInstructionsForTest(instructions); + const promptId = hash({ + attempt: 2, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: repairInstructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const sessionId = randomUUID(); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptId, + lastPromptAt: new Date(), + state: "BUSY", + }); + mkdirSync(dirname(expectedArtifactPath), { recursive: true }); + writeFileSync( + expectedArtifactPath, + JSON.stringify({ + summary: "Repair replay artifact", + requirements: [{ id: "REQ-1", description: "Resume repair artifact wait" }], + acceptanceCriteria: ["Accepted"], + risks: [], + }), + ); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result).toMatchObject({ artifactValid: true, promptId, sessionId }); + await expectRunCompleted(db, runId); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 2, state: "completed" }); + }); + + it("gates instead of starting a third attempt when a replayed repair artifact is invalid", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 2); + await recordPhaseStarted(db, runId, phaseId, 2, true); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-repair-invalid-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = + "Scenario: invalid\nRepair-Scenario: invalid\nWrite the development specification."; + const promptId = hash({ + attempt: 2, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: repairInstructionsForTest(instructions), + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const sessionId = randomUUID(); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptId, + lastPromptAt: new Date(), + state: "READY", + }); + await db.insert(artifacts).values({ + id: randomUUID(), + runId, + phaseId, + path: expectedArtifactPath, + schemaId: "dev/spec@1", + hash: "1".repeat(64), + valid: false, + validationError: { errors: [{ keyword: "required" }] }, + }); + mkdirSync(dirname(expectedArtifactPath), { recursive: true }); + writeFileSync(expectedArtifactPath, JSON.stringify({ fake: "invalid" })); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }), + ).rejects.toMatchObject({ code: "artifact_invalid_after_repair" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 2, state: "failed" }); + await expectRunPaused(db, runId); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "artifact_invalid_after_repair", state: "pending" }); + }); + + it("revalidates an artifact file when validating replay has no artifact row yet", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 1); + await recordPhaseStarted(db, runId, phaseId); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-validating-file-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const instructions = "Scenario: ok\nWrite the development specification."; + const promptId = hash({ + attempt: 1, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const sessionId = randomUUID(); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptId, + lastPromptAt: new Date(), + state: "BUSY", + }); + mkdirSync(dirname(expectedArtifactPath), { recursive: true }); + writeFileSync( + expectedArtifactPath, + JSON.stringify({ + summary: "Validation replay artifact", + requirements: [{ id: "REQ-1", description: "Resume validation" }], + acceptanceCriteria: ["Accepted"], + risks: [], + }), + ); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result).toMatchObject({ artifactValid: true, promptId, sessionId }); + await expectRunCompleted(db, runId); + + const artifactRows = await db.select().from(artifacts).where(eq(artifacts.runId, runId)); + expect(artifactRows).toHaveLength(1); + expect(artifactRows[0]?.valid).toBe(true); + }); + + it("ignores a stale prior-attempt artifact row during repair validation replay", async () => { + const { db, phaseId, runId } = await createRunAndPhase("executing", "validating", 2); + await recordPhaseStarted(db, runId, phaseId, 2, true); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-stale-artifact-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const staleCreatedAt = new Date("2026-01-01T00:00:00.000Z"); + const repairPromptAt = new Date("2026-01-01T00:01:00.000Z"); + await db.insert(artifacts).values({ + id: randomUUID(), + runId, + phaseId, + path: expectedArtifactPath, + schemaId: "dev/spec@1", + hash: "2".repeat(64), + valid: false, + validationError: { errors: [{ keyword: "required" }] }, + createdAt: staleCreatedAt, + }); + const instructions = + "Scenario: invalid\nRepair-Scenario: ok\nWrite the development specification."; + const promptId = hash({ + attempt: 2, + expectedArtifact: expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: repairInstructionsForTest(instructions), + phaseKey: "implement", + roleId: "implementer", + runId, + }); + const sessionId = randomUUID(); + await db.insert(tuiSessions).values({ + id: sessionId, + runId, + roleId: "implementer", + backend: "fake", + cwd: worktreeRoot, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + lastPromptHash: promptId, + lastPromptAt: repairPromptAt, + state: "BUSY", + }); + mkdirSync(dirname(expectedArtifactPath), { recursive: true }); + writeFileSync( + expectedArtifactPath, + JSON.stringify({ + summary: "Fresh repair artifact", + requirements: [{ id: "REQ-1", description: "Ignore stale artifact row" }], + acceptanceCriteria: ["Accepted"], + risks: [], + }), + ); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions, + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + expect(result).toMatchObject({ artifactValid: true, promptId, sessionId }); + await expectRunCompleted(db, runId); + + const artifactRows = await db + .select({ valid: artifacts.valid }) + .from(artifacts) + .where(eq(artifacts.runId, runId)); + expect(artifactRows.map((artifact) => artifact.valid).sort()).toEqual([false, true]); + }); + + it("captures transcript from the persisted session cursor", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-cursor-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + const adapter = new CaptureCursorFakeAdapter(db, { + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }); + + const result = await runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000036", + }); + + expect(adapter.capturedFromSeq).toBe(1n); + expect(result.transcriptCaptured).toBe(2); + + const [session] = await db + .select({ lastCaptureSeq: tuiSessions.lastCaptureSeq }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session?.lastCaptureSeq).toBe(3n); + + const chunks = await db + .select({ seq: tuiTranscriptChunks.seq }) + .from(tuiTranscriptChunks) + .where(eq(tuiTranscriptChunks.sessionId, sessionId)) + .orderBy(tuiTranscriptChunks.seq); + expect(chunks.map((chunk) => chunk.seq)).toEqual([1n, 2n, 3n]); + }); + + it("maps human-required session start failures to a gate", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-start-human-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + + await expect( + runSingleFakePhase({ + adapter: new StartFailsFakeAdapter( + new DevflowError("backend unavailable", { + class: "human_required", + code: "backend_unavailable", + runId, + phaseId, + }), + ), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }), + ).rejects.toMatchObject({ code: "backend_unavailable" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 1, state: "failed" }); + await expectRunPaused(db, runId); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "backend_unavailable", state: "pending" }); + + const sessions = await db.select().from(tuiSessions).where(eq(tuiSessions.runId, runId)); + expect(sessions).toEqual([]); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "phase.failed", + "run.paused", + "approval.requested", + ]); + }); + + it("fails the run on fatal session start failures", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-start-fatal-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + + await expect( + runSingleFakePhase({ + adapter: new StartFailsFakeAdapter( + new DevflowError("fatal session start", { + class: "fatal", + code: "internal_state_corruption", + runId, + phaseId, + }), + ), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 1, state: "failed" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "phase.failed", + "run.failed", + ]); + }); + + it("canonicalizes worktree and artifact paths before hashing and persistence", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const tempRoot = mkdtempSync(join(tmpdir(), "devflow-fake-phase-paths-")); + tempRoots.push(tempRoot); + const worktreeRoot = join(tempRoot, "repo"); + const symlinkRoot = join(tempRoot, "repo-link"); + mkdirSync(worktreeRoot); + symlinkSync(worktreeRoot, symlinkRoot, "dir"); + const expectedArtifactPath = join(symlinkRoot, "artifacts", "spec.json"); + const canonicalWorktreeRoot = realpathSync(worktreeRoot); + const canonicalArtifactPath = join(canonicalWorktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot: symlinkRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000019", + }); + + expect(result.promptId).toBe( + hash({ + attempt: 1, + expectedArtifact: canonicalArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseKey: "implement", + roleId: "implementer", + runId, + }), + ); + + const [session] = await db + .select({ + cwd: tuiSessions.cwd, + expectedArtifactPath: tuiSessions.expectedArtifactPath, + lastPromptHash: tuiSessions.lastPromptHash, + }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ + cwd: canonicalWorktreeRoot, + expectedArtifactPath: canonicalArtifactPath, + lastPromptHash: result.promptId, + }); + + const [artifact] = await db + .select({ path: artifacts.path }) + .from(artifacts) + .where(eq(artifacts.id, result.artifactId)); + expect(artifact?.path).toBe(canonicalArtifactPath); + }); + + it("canonicalizes symlink ancestors inside the worktree", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const tempRoot = mkdtempSync(join(tmpdir(), "devflow-fake-phase-path-ancestor-")); + tempRoots.push(tempRoot); + const worktreeRoot = join(tempRoot, "repo"); + const artifactTargetRoot = join(tempRoot, "artifact-target"); + mkdirSync(worktreeRoot); + mkdirSync(artifactTargetRoot); + symlinkSync(artifactTargetRoot, join(worktreeRoot, "artifacts"), "dir"); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const canonicalArtifactPath = join(realpathSync(artifactTargetRoot), "spec.json"); + const sessionId = randomUUID(); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000022", + }); + + const [artifact] = await db + .select({ path: artifacts.path }) + .from(artifacts) + .where(eq(artifacts.id, result.artifactId)); + expect(artifact?.path).toBe(canonicalArtifactPath); + }); + + it("removes stale pre-existing artifacts before the first wait", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-stale-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + mkdirSync(dirname(expectedArtifactPath), { recursive: true }); + writeFileSync(expectedArtifactPath, JSON.stringify({ fake: "stale" })); + + await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000023", + }); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toContain("artifact.timeout"); + expect(events.filter((event) => event.type === "artifact.validated")).toHaveLength(1); + }); + + it("refuses to restart a completed phase", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-terminal-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + + await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite the development specification again.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)); + expect(events.filter((event) => event.type === "phase.started")).toHaveLength(1); + expect(events.filter((event) => event.type === "run.completed")).toHaveLength(1); + }); + + it("records invalid artifacts and repairs once before completing the phase", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-invalid-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: invalid\nWrite an invalid development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000011", + }); + + expect(result.artifactValid).toBe(true); + + const artifactRows = await db + .select({ + id: artifacts.id, + valid: artifacts.valid, + validationError: artifacts.validationError, + }) + .from(artifacts) + .where(eq(artifacts.runId, runId)); + const invalidArtifact = artifactRows.find((artifact) => !artifact.valid); + const validArtifact = artifactRows.find((artifact) => artifact.valid); + expect(invalidArtifact?.validationError).toEqual({ + errors: expect.arrayContaining([expect.objectContaining({ keyword: "required" })]), + }); + expect(validArtifact?.id).toBe(result.artifactId); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "completed", attempts: 2 }); + await expectRunCompleted(db, runId); + + const events = await db + .select({ + idempotencyKey: runEvents.idempotencyKey, + seq: runEvents.seq, + type: runEvents.type, + }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.seq)).toEqual([ + 1n, + 2n, + 3n, + 4n, + 5n, + 6n, + 7n, + 8n, + 9n, + 10n, + 11n, + 12n, + 13n, + 14n, + 15n, + 16n, + ]); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "session.created", + "session.ready", + "session.busy", + "prompt.sent", + "artifact.expected", + "artifact.invalid", + "session.idle", + "phase.started", + "session.busy", + "prompt.repaired", + "artifact.expected", + "artifact.validated", + "session.idle", + "phase.completed", + "run.completed", + ]); + expect(events.find((event) => event.type === "session.ready")?.idempotencyKey).toBe( + `session.ready:${sessionId}:0`, + ); + expect(events.at(-1)?.idempotencyKey).toBe(`run.completed:${runId}`); + }); + + it("records artifact timeout and recovers with one repair prompt", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-timeout-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + const result = await runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nNever write the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000012", + }); + expect(result.artifactValid).toBe(true); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "completed", attempts: 2 }); + await expectRunCompleted(db, runId); + + const [session] = await db + .select({ + lastCaptureSeq: tuiSessions.lastCaptureSeq, + state: tuiSessions.state, + }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session?.state).toBe("READY"); + expect(session?.lastCaptureSeq).toBeGreaterThan(0n); + + const events = await db + .select({ + idempotencyKey: runEvents.idempotencyKey, + seq: runEvents.seq, + type: runEvents.type, + }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.seq)).toEqual([ + 1n, + 2n, + 3n, + 4n, + 5n, + 6n, + 7n, + 8n, + 9n, + 10n, + 11n, + 12n, + 13n, + 14n, + 15n, + 16n, + 17n, + ]); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "session.created", + "session.ready", + "session.busy", + "prompt.sent", + "artifact.expected", + "artifact.timeout", + "session.recovered", + "phase.started", + "session.busy", + "prompt.repaired", + "artifact.expected", + "artifact.validated", + "session.idle", + "phase.completed", + "session.ready", + "run.completed", + ]); + expect(events.at(-2)?.idempotencyKey).toBe(`session.ready:${sessionId}:1`); + expect(events.at(-1)?.idempotencyKey).toBe(`run.completed:${runId}`); + }); + + it("retries rebootstrap once during timeout recovery", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-rebootstrap-retry-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + const adapter = new RebootstrapFailsOnceFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }); + + const result = await runSingleFakePhase({ + adapter, + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: ok\nRetry rebootstrap once.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000024", + }); + + expect(result.artifactValid).toBe(true); + expect(adapter.rebootstrapAttempts).toBe(2); + await expectRunCompleted(db, runId); + }); + + it("fails the run instead of gating when timeout recovery rebootstrap is fatal", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-rebootstrap-fatal-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new RebootstrapFatalFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: ok\nFatal rebootstrap.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000025", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + }); + + it("fails the run when timeout recovery rebootstrap throws an unclassified error", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-rebootstrap-unknown-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new RebootstrapUnknownFailureFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: ok\nUnknown rebootstrap failure.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000029", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + }); + + it("preserves human-required timeout recovery codes", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-rebootstrap-human-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new RebootstrapHumanRequiredFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: ok\nHuman-required rebootstrap failure.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000030", + }), + ).rejects.toMatchObject({ code: "backend_unavailable" }); + + await expectRunPaused(db, runId); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "backend_unavailable", state: "pending" }); + }); + + it("requests a human gate when timeout recovery probe fails recoverably", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-probe-recoverable-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new ProbeRecoverableFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: ok\nProbe fails recoverably.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000027", + }), + ).rejects.toMatchObject({ code: "artifact_timeout_exhausted" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 1, state: "failed" }); + + await expectRunPaused(db, runId); + + const [session] = await db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ state: "FAILED_NEEDS_HUMAN" }); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "artifact_timeout_exhausted", state: "pending" }); + }); + + it("fails the run when timeout recovery probe throws an unclassified error", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-probe-unknown-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new ProbeUnknownFailureFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: ok\nProbe fails unexpectedly.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000032", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + }); + + it("does not let a stale artifact produced during timeout recovery satisfy repair validation", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-late-write-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + const result = await runSingleFakePhase({ + adapter: new RebootstrapWritesStaleArtifactFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: + "Scenario: timeout\nRepair-Scenario: ok\nRecovery writes stale invalid artifact.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000020", + }); + + expect(result.artifactValid).toBe(true); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "completed", attempts: 2 }); + await expectRunCompleted(db, runId); + + const artifactRows = await db + .select({ valid: artifacts.valid }) + .from(artifacts) + .where(eq(artifacts.runId, runId)); + expect(artifactRows).toEqual([{ valid: true }]); + }); + + it("accepts artifacts written before sendPrompt resolves when absent before prompt send", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-fast-write-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + const result = await runSingleFakePhase({ + adapter: new PromptWritesArtifactBeforeReturnFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: ok\nWrite artifact before returning.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 100 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000026", + }); + + expect(result.artifactValid).toBe(true); + await expectRunCompleted(db, runId); + + const artifactRows = await db + .select({ valid: artifacts.valid, validationError: artifacts.validationError }) + .from(artifacts) + .where(eq(artifacts.runId, runId)); + expect(artifactRows).toEqual([{ valid: true, validationError: null }]); + }); + + it("fails the run when artifact wait hits a workspace path error", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-path-error-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new BreakArtifactParentFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nBreak artifact parent before wait.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000028", + }), + ).rejects.toMatchObject({ code: "workspace_permissions" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 1, state: "failed" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).not.toContain("artifact.timeout"); + }); + + it("fails the run when artifact read hits a workspace path error", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-read-error-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new WriteDirectoryArtifactFakeAdapter({ + sessionIdFactory: () => sessionId, + writeDelayMs: 0, + }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nWrite a directory at the artifact path.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000031", + }), + ).rejects.toMatchObject({ code: "workspace_permissions" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + }); + + it("requests a human gate when the one repair attempt still produces an invalid artifact", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-repair-failed-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: + "Scenario: invalid\nRepair-Scenario: invalid\nKeep producing an invalid development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000014", + }), + ).rejects.toMatchObject({ code: "artifact_invalid_after_repair" }); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "failed", attempts: 2 }); + + const [approval] = await db + .select({ + gateKey: approvalRequests.gateKey, + state: approvalRequests.state, + }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "artifact_invalid_after_repair", state: "pending" }); + + await expectRunPaused(db, runId); + + const events = await db + .select({ + idempotencyKey: runEvents.idempotencyKey, + payload: runEvents.payload, + type: runEvents.type, + }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toContain("approval.requested"); + expect(events.map((event) => event.type)).toContain("run.paused"); + expect(events.find((event) => event.type === "run.paused")?.idempotencyKey).toBe( + `run.paused:${runId}:human_required:artifact_invalid_after_repair:${phaseId}:2`, + ); + expect(events.find((event) => event.type === "run.paused")?.payload).toMatchObject({ + cause: `human_required:artifact_invalid_after_repair:${phaseId}:2`, + }); + }); + + it("rejects approval request idempotency replays with a different payload", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-approval-replay-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const approvalIdempotencyKey = `${runId}:artifact_invalid_after_repair:${phaseId}:2`; + + await db.insert(approvalRequests).values({ + runId, + phaseId, + gateKey: "artifact_invalid_after_repair", + state: "pending", + idempotencyKey: approvalIdempotencyKey, + payload: { artifactId: "stale-artifact", expectedArtifactPath }, + }); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: + "Scenario: invalid\nRepair-Scenario: invalid\nKeep producing an invalid development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000034", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const approvals = await db + .select({ + idempotencyKey: approvalRequests.idempotencyKey, + payload: approvalRequests.payload, + }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([ + { + idempotencyKey: approvalIdempotencyKey, + payload: { artifactId: "stale-artifact", expectedArtifactPath }, + }, + ]); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const [phase] = await db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ attempts: 2, state: "failed" }); + + const [session] = await db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.runId, runId)); + expect(session).toEqual({ state: "FAILED_NEEDS_HUMAN" }); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toContain("phase.failed"); + expect(events.map((event) => event.type)).toContain("run.failed"); + expect(events.map((event) => event.type)).toContain("session.failed"); + expect(events.map((event) => event.type)).not.toContain("run.paused"); + expect(events.map((event) => event.type)).not.toContain("approval.requested"); + }); + + it("requests a human gate when invalid-artifact repair times out", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-repair-timeout-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: + "Scenario: invalid\nRepair-Scenario: timeout\nRepair never writes the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000015", + }), + ).rejects.toMatchObject({ code: "artifact_timeout_exhausted" }); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "failed", attempts: 2 }); + + const [session] = await db + .select({ lastCaptureSeq: tuiSessions.lastCaptureSeq, state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session?.state).toBe("FAILED_NEEDS_HUMAN"); + expect(session?.lastCaptureSeq).toBeGreaterThan(0n); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "artifact_timeout_exhausted", state: "pending" }); + + await expectRunPaused(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual( + expect.arrayContaining([ + "artifact.timeout", + "phase.failed", + "run.paused", + "session.failed", + "approval.requested", + ]), + ); + }); + + it("requests a human gate when timeout recovery repair writes an invalid artifact", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-timeout-repair-invalid-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: invalid\nTimeout repair writes invalid.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000018", + }), + ).rejects.toMatchObject({ code: "artifact_invalid_after_repair" }); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "failed", attempts: 2 }); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "artifact_invalid_after_repair", state: "pending" }); + + await expectRunPaused(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.filter((event) => event.type === "phase.started")).toHaveLength(2); + expect(events.map((event) => event.type)).toEqual( + expect.arrayContaining([ + "artifact.timeout", + "artifact.invalid", + "phase.failed", + "run.paused", + "session.failed", + "approval.requested", + ]), + ); + }); + + it("requests a human gate when invalid-artifact repair prompt crashes", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-repair-crash-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: + "Scenario: invalid\nRepair-Scenario: crash\nRepair prompt crashes before writing.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000016", + }), + ).rejects.toMatchObject({ code: "prompt_send_exhausted" }); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "failed", attempts: 2 }); + + const [session] = await db + .select({ lastCaptureSeq: tuiSessions.lastCaptureSeq, state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session?.state).toBe("FAILED_NEEDS_HUMAN"); + expect(session?.lastCaptureSeq).toBeGreaterThan(0n); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "prompt_send_exhausted", state: "pending" }); + + await expectRunPaused(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual( + expect.arrayContaining([ + "session.crashed", + "phase.failed", + "run.paused", + "session.failed", + "approval.requested", + ]), + ); + }); + + it("requests a timeout gate when timeout-recovery repair prompt crashes", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-timeout-repair-crash-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: timeout\nRepair-Scenario: crash\nTimeout repair crashes.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000017", + }), + ).rejects.toMatchObject({ code: "prompt_send_exhausted" }); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "failed", attempts: 2 }); + + const [session] = await db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session?.state).toBe("FAILED_NEEDS_HUMAN"); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "prompt_send_exhausted", state: "pending" }); + + await expectRunPaused(db, runId); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual( + expect.arrayContaining([ + "session.crashed", + "phase.failed", + "run.paused", + "session.failed", + "approval.requested", + ]), + ); + }); + + it("retries prompt send failures before creating a human gate", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync(mkdtempSync(join(tmpdir(), "devflow-fake-phase-crash-"))); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/spec@1", + instructions: "Scenario: crash\nCrash before writing the development specification.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 10 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000013", + }), + ).rejects.toMatchObject({ code: "prompt_send_exhausted" }); + + const [phase] = await db + .select({ state: runPhases.state, attempts: runPhases.attempts }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "failed", attempts: 1 }); + + const [session] = await db + .select({ + lastCaptureSeq: tuiSessions.lastCaptureSeq, + recoveryAttempts: tuiSessions.recoveryAttempts, + state: tuiSessions.state, + }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toMatchObject({ + recoveryAttempts: 1, + state: "FAILED_NEEDS_HUMAN", + }); + expect(session?.lastCaptureSeq).toBeGreaterThan(0n); + + const [approval] = await db + .select({ gateKey: approvalRequests.gateKey, state: approvalRequests.state }) + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approval).toEqual({ gateKey: "prompt_send_exhausted", state: "pending" }); + + await expectRunPaused(db, runId); + + const events = await db + .select({ seq: runEvents.seq, type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.seq)).toEqual([1n, 2n, 3n, 4n, 5n, 6n, 7n, 8n, 9n]); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "session.created", + "session.ready", + "session.busy", + "phase.failed", + "run.paused", + "session.crashed", + "session.failed", + "approval.requested", + ]); + + const chunks = await db + .select({ content: tuiTranscriptChunks.content }) + .from(tuiTranscriptChunks) + .where(eq(tuiTranscriptChunks.sessionId, sessionId)) + .orderBy(tuiTranscriptChunks.seq); + expect(chunks.filter((chunk) => chunk.content.includes("crashing"))).toHaveLength(3); + }); + + it("fails the run and disposes the session on fatal artifact validation errors", async () => { + const { db, phaseId, runId } = await createRunAndPhase(); + const worktreeRoot = realpathSync( + mkdtempSync(join(tmpdir(), "devflow-fake-phase-fatal-validation-")), + ); + tempRoots.push(worktreeRoot); + const expectedArtifactPath = join(worktreeRoot, "artifacts", "spec.json"); + const sessionId = randomUUID(); + + await expect( + runSingleFakePhase({ + adapter: new FakeSessionAdapter({ sessionIdFactory: () => sessionId, writeDelayMs: 0 }), + db, + expectedArtifactPath, + expectedSchema: "dev/unknown@1", + instructions: "Scenario: invalid\nWrite an artifact for an unknown schema.", + phaseId, + phaseKey: "implement", + roleId: "implementer", + runId, + worktreeRoot, + wait: { pollIntervalMs: 1, stableMs: 0, timeoutMs: 500 }, + uuidFactory: () => "00000000-0000-4000-8000-000000000021", + }), + ).rejects.toMatchObject({ code: "artifact_schema_unknown" }); + + const [phase] = await db + .select({ state: runPhases.state }) + .from(runPhases) + .where(eq(runPhases.id, phaseId)); + expect(phase).toEqual({ state: "failed" }); + + const [run] = await db.select({ state: runs.state }).from(runs).where(eq(runs.id, runId)); + expect(run).toEqual({ state: "failed" }); + + const [session] = await db + .select({ state: tuiSessions.state }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + expect(session).toEqual({ state: "FAILED_NEEDS_HUMAN" }); + + const approvals = await db + .select() + .from(approvalRequests) + .where(eq(approvalRequests.runId, runId)); + expect(approvals).toEqual([]); + + const events = await db + .select({ type: runEvents.type }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.type)).toEqual([ + "phase.started", + "session.created", + "session.ready", + "session.busy", + "prompt.sent", + "artifact.expected", + "phase.failed", + "run.failed", + "session.failed", + ]); + }); +}); + +async function expectRunPaused(db: DbClient["db"], runId: string) { + const [run] = await db + .select({ pausedFromState: runs.pausedFromState, state: runs.state }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run).toEqual({ pausedFromState: "executing", state: "paused" }); +} + +async function expectRunCompleted(db: DbClient["db"], runId: string) { + const [run] = await db + .select({ endedAt: runs.endedAt, state: runs.state }) + .from(runs) + .where(eq(runs.id, runId)); + expect(run?.state).toBe("completed"); + expect(run?.endedAt).toBeInstanceOf(Date); +} + +describe("RunEventRepository", () => { + let client: DbClient | undefined; + const templateIds: string[] = []; + const runIds: string[] = []; + + afterEach(async () => { + if (client === undefined) { + return; + } + if (runIds.length > 0) { + await client.db.delete(runs).where(inArray(runs.id, [...runIds])); + } + if (templateIds.length > 0) { + await client.db + .delete(workflowTemplates) + .where(inArray(workflowTemplates.id, [...templateIds])); + } + await client.close(); + client = undefined; + runIds.length = 0; + templateIds.length = 0; + }); + + async function createRun() { + client ??= createDbClient(databaseUrl); + const templateId = randomUUID(); + const runId = randomUUID(); + templateIds.push(templateId); + runIds.push(runId); + + await client.db.insert(workflowTemplates).values({ + id: templateId, + name: `template-${templateId}`, + version: 1, + hash: `hash-${templateId}`, + definition: {}, + }); + await client.db.insert(runs).values({ + id: runId, + templateId, + templateHash: `hash-${templateId}`, + state: "executing", + repoPath: `/tmp/devflow-${runId}`, + baseBranch: "main", + worktreeRoot: `/tmp/devflow-${runId}/main`, + }); + + return { db: client.db, runId }; + } + + async function createRunWithPhase() { + const { db, runId } = await createRun(); + const phaseId = randomUUID(); + await db.insert(runPhases).values({ + id: phaseId, + runId, + phaseKey: "implement", + seq: 1, + state: "pending", + attempts: 0, + }); + + return { db, phaseId, runId }; + } + + it("appends gap-free per-run events and returns existing rows on idempotency replay", async () => { + const { db, phaseId, runId } = await createRunWithPhase(); + const repository = new RunEventRepository(db); + + const first = await repository.append({ + idempotencyKey: `run.started:${runId}`, + payload: { state: "executing" }, + runId, + type: "run.started", + }); + const replayed = await repository.append({ + idempotencyKey: `run.started:${runId}`, + payload: { state: "executing" }, + runId, + type: "run.started", + }); + const second = await repository.append({ + idempotencyKey: `phase.started:${phaseId}:1`, + phaseId, + payload: { attempt: 1, phaseKey: "implement" }, + runId, + type: "phase.started", + }); + + expect(replayed).toEqual(first); + expect([first.seq, second.seq]).toEqual([1n, 2n]); + }); + + it("allows idempotency replay of non-phase-scoped events without a phase id", async () => { + const { db, phaseId, runId } = await createRunWithPhase(); + const repository = new RunEventRepository(db); + const dedupKey = "a".repeat(64); + + const original = await repository.append({ + idempotencyKey: `prompt.sent:${dedupKey}`, + phaseId, + payload: { dedupKey, roleId: "implementer" }, + runId, + type: "prompt.sent", + }); + const replayed = await repository.append({ + idempotencyKey: `prompt.sent:${dedupKey}`, + payload: { dedupKey, roleId: "implementer" }, + runId, + type: "prompt.sent", + }); + + expect(replayed).toEqual(original); + }); + + it("rejects idempotency replays that do not match the existing event", async () => { + const { db, runId } = await createRun(); + const repository = new RunEventRepository(db); + + await repository.append({ + idempotencyKey: `run.started:${runId}`, + payload: { state: "executing" }, + runId, + type: "run.started", + }); + + await expect( + repository.append({ + idempotencyKey: `run.started:${runId}`, + payload: { state: "planning" }, + runId, + type: "run.started", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + }); + + it("rejects phase-scoped events without a phase id", async () => { + const { db, runId } = await createRun(); + const repository = new RunEventRepository(db); + + await expect( + repository.append({ + idempotencyKey: "phase.started:00000000-0000-4000-8000-000000000000:1", + payload: { attempt: 1, phaseKey: "implement" }, + runId, + type: "phase.started", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + }); + + it("serializes concurrent writers into gap-free per-run sequences", async () => { + const { db, runId } = await createRun(); + const repository = new RunEventRepository(db); + + await Promise.all( + Array.from({ length: 10 }, (_, index) => + repository.append({ + idempotencyKey: `command.completed:00000000-0000-4000-8000-${String(index).padStart(12, "0")}`, + payload: { commandId: `00000000-0000-4000-8000-${String(index).padStart(12, "0")}` }, + runId, + type: "command.completed", + }), + ), + ); + + const events = await db + .select({ seq: runEvents.seq }) + .from(runEvents) + .where(eq(runEvents.runId, runId)) + .orderBy(runEvents.seq); + expect(events.map((event) => event.seq)).toEqual([1n, 2n, 3n, 4n, 5n, 6n, 7n, 8n, 9n, 10n]); + }); + + it("rejects phase-scoped events when the phase belongs to another run", async () => { + const first = await createRun(); + const second = await createRunWithPhase(); + const repository = new RunEventRepository(first.db); + + await expect( + repository.append({ + idempotencyKey: `phase.started:${second.phaseId}:1`, + phaseId: second.phaseId, + payload: { attempt: 1, phaseKey: "implement" }, + runId: first.runId, + type: "phase.started", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + }); + + it("rejects event idempotency keys that do not match the locked formula", async () => { + const { db, runId } = await createRun(); + const repository = new RunEventRepository(db); + + await expect( + repository.append({ + idempotencyKey: "run.started:test", + payload: { state: "executing" }, + runId, + type: "run.started", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + }); + + it("requires review batch events to be phase scoped with reviewer and attempt", async () => { + const { db, phaseId, runId } = await createRunWithPhase(); + const repository = new RunEventRepository(db); + + await expect( + repository.append({ + idempotencyKey: `review.batch_recorded:${phaseId}:reviewer:1`, + payload: { attempt: 1, reviewerRole: "reviewer" }, + runId, + type: "review.batch_recorded", + }), + ).rejects.toMatchObject({ code: "internal_state_corruption" }); + + const event = await repository.append({ + idempotencyKey: `review.batch_recorded:${phaseId}:reviewer:1`, + phaseId, + payload: { attempt: 1, reviewerRole: "reviewer" }, + runId, + type: "review.batch_recorded", + }); + expect(event.seq).toBe(1n); + }); +}); diff --git a/packages/run-engine/src/fake-phase-harness.ts b/packages/run-engine/src/fake-phase-harness.ts new file mode 100644 index 0000000..0a3fece --- /dev/null +++ b/packages/run-engine/src/fake-phase-harness.ts @@ -0,0 +1,2022 @@ +import { createHash, randomUUID } from "node:crypto"; +import { existsSync, realpathSync } from "node:fs"; +import { readFile, stat, unlink } from "node:fs/promises"; +import { basename, dirname, isAbsolute, relative, resolve } from "node:path"; + +import { + DevflowError, + type PromptEnvelope, + canonicalize, + hash, + validateArtifact, +} from "@devflow/core"; +import { + type DbClient, + RunEventRepository, + TuiTranscriptRepository, + approvalRequests, + artifacts, + runEvents, + runPhases, + runs, + tuiSessions, +} from "@devflow/db"; +import { + type SessionAdapter, + type SessionHandle, + type TranscriptChunkSink, + captureAndPersistTranscript, +} from "@devflow/session"; +import { and, desc, eq, inArray, sql } from "drizzle-orm"; + +export interface FakePhaseWaitOptions { + timeoutMs?: number; + pollIntervalMs?: number; + stableMs?: number; +} + +interface ArtifactWaitOptions extends FakePhaseWaitOptions { + ignoreInitialSignature?: string; +} + +export interface RunSingleFakePhaseInput { + db: DbClient["db"]; + adapter: SessionAdapter; + runId: string; + phaseId: string; + phaseKey: string; + roleId: string; + worktreeRoot: string; + expectedArtifactPath: string; + expectedSchema: string; + instructions: string; + wait?: FakePhaseWaitOptions; + uuidFactory?: () => string; + transcriptSink?: TranscriptChunkSink; +} + +export interface RunSingleFakePhaseResult { + sessionId: string; + promptId: string; + artifactId: string; + artifactHash: string; + artifactValid: boolean; + transcriptCaptured: number; +} + +type TransactionDb = Parameters[0]>[0]; + +const sendPromptRetryBudget = 2; + +interface PhaseEntry { + attempt: number; + continueArtifactWait: boolean; + continueValidation: boolean; + promptId?: string; + repairAttemptUsed: boolean; + replayedOutcome?: ArtifactOutcome; + resumedPrompt: boolean; + handle?: SessionHandle; +} + +function canonicalizeRunSingleFakePhaseInput( + input: RunSingleFakePhaseInput, +): RunSingleFakePhaseInput { + const rawWorktreeRoot = resolve(input.worktreeRoot); + const worktreeRoot = realpathSync(rawWorktreeRoot); + const expectedArtifactPath = canonicalizePathAgainstWorktree( + input.expectedArtifactPath, + rawWorktreeRoot, + worktreeRoot, + ); + + return { ...input, expectedArtifactPath, worktreeRoot }; +} + +function canonicalizePathAgainstWorktree( + path: string, + rawWorktreeRoot: string, + canonicalWorktreeRoot: string, +): string { + const absolutePath = resolve(path); + const relativeToWorktree = relative(rawWorktreeRoot, absolutePath); + if (!relativeToWorktree.startsWith("..") && !isAbsolute(relativeToWorktree)) { + return canonicalizePossiblyMissingPath(resolve(canonicalWorktreeRoot, relativeToWorktree)); + } + + return canonicalizePossiblyMissingPath(absolutePath); +} + +function canonicalizePossiblyMissingPath(path: string): string { + const missingSegments: string[] = []; + let current = resolve(path); + + while (!existsSync(current)) { + const parent = dirname(current); + if (parent === current) { + return resolve(path); + } + missingSegments.unshift(basename(current)); + current = parent; + } + + return resolve(realpathSync(current), ...missingSegments); +} + +export async function runSingleFakePhase( + rawInput: RunSingleFakePhaseInput, +): Promise { + const input = canonicalizeRunSingleFakePhaseInput(rawInput); + const eventRepository = new RunEventRepository(input.db); + const phaseEntry = await enterInitialPhase(input, eventRepository); + const attempt = phaseEntry.attempt; + let handle: SessionHandle; + if (phaseEntry.handle !== undefined) { + handle = phaseEntry.handle; + } else { + try { + await removeStaleArtifact(input); + } catch (error) { + await failPhaseAndRun(input, eventRepository, attempt, "stale_artifact_remove_failed"); + throw error; + } + + handle = await startSessionAndRecord(input, eventRepository, attempt); + } + + const activeInstructions = phaseEntry.repairAttemptUsed + ? repairInstructionsFor(input.instructions) + : input.instructions; + const envelope = buildEnvelope(input, attempt, activeInstructions); + const promptEventType = phaseEntry.repairAttemptUsed ? "prompt.repaired" : "prompt.sent"; + let repairAttemptUsed = phaseEntry.repairAttemptUsed; + let promptSend: PromptSendRecord | undefined; + let promptId: string; + let outcome: ArtifactOutcome | undefined = phaseEntry.replayedOutcome; + let promptDedupKeyForIdle = envelope.dedupKey; + let initialPromptIdleRecorded = false; + if (phaseEntry.replayedOutcome !== undefined) { + promptId = requirePhaseEntryPromptId(input, phaseEntry, "Replayed artifact entry"); + promptDedupKeyForIdle = promptId; + await markSessionIdle(input, eventRepository, handle.sessionId, promptId); + initialPromptIdleRecorded = true; + } else if (phaseEntry.continueArtifactWait) { + promptId = requirePhaseEntryPromptId(input, phaseEntry, "Artifact wait replay"); + promptDedupKeyForIdle = promptId; + promptSend = { promptId, artifactBaselineSignature: undefined }; + } else if (phaseEntry.continueValidation) { + promptId = requirePhaseEntryPromptId(input, phaseEntry, "Artifact validation replay"); + promptDedupKeyForIdle = promptId; + } else { + try { + promptSend = await sendPromptAndRecord( + input, + eventRepository, + handle, + envelope, + promptEventType, + { captureArtifactBaseline: !phaseEntry.resumedPrompt }, + ); + promptId = promptSend.promptId; + } catch (error) { + if (shouldCreateHumanGate(error)) { + const gateError = toHumanRequiredRecoveryError(error); + await failPhaseAndRequestGate( + input, + eventRepository, + attempt, + "prompt_send_failed", + gateError.code, + { errorCode: error.code, recoveryHint: gateError.recoveryHint }, + handle.sessionId, + { markSessionCrashed: true }, + ); + await captureTranscript(input, handle); + throw gateError; + } + await failRunAndDisposeSession(input, eventRepository, attempt, "prompt_send_failed", handle); + await captureTranscript(input, handle); + throw error; + } + } + + if (outcome === undefined && promptSend === undefined && !phaseEntry.continueValidation) { + throw new DevflowError("Prompt send state missing before artifact wait", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + if (outcome === undefined) { + try { + outcome = phaseEntry.continueValidation + ? await validateCurrentArtifact(input, eventRepository, attempt) + : await waitForAndValidateArtifact( + input, + eventRepository, + attempt, + handle.sessionId, + promptSend?.artifactBaselineSignature, + ); + } catch (error) { + if (!isDevflowErrorWithCode(error, "artifact_timeout_exhausted")) { + await failRunAndDisposeSession( + input, + eventRepository, + attempt, + "artifact_validation_failed", + handle, + ); + await captureTranscript(input, handle); + throw error; + } + + let recovered: boolean; + try { + recovered = await recoverFromArtifactTimeout(input, eventRepository, handle.sessionId); + } catch (recoveryError) { + if (shouldCreateHumanGate(recoveryError)) { + const gateError = toArtifactTimeoutRecoveryGateError(recoveryError); + await failPhaseAndRequestGate( + input, + eventRepository, + attempt, + "artifact_timeout_recovery_failed", + gateError.code, + { + errorCode: recoveryError.code, + expectedArtifactPath: input.expectedArtifactPath, + recoveryHint: gateError.recoveryHint, + }, + handle.sessionId, + ); + await captureTranscript(input, handle); + throw gateError; + } + await failRunAndDisposeSession( + input, + eventRepository, + attempt, + "artifact_timeout_recovery_failed", + handle, + ); + await captureTranscript(input, handle); + throw recoveryError; + } + if (!recovered) { + await failPhaseAndRequestGate( + input, + eventRepository, + attempt, + "artifact_timeout", + "artifact_timeout_exhausted", + { + expectedArtifactPath: input.expectedArtifactPath, + }, + handle.sessionId, + { markSessionCrashed: true }, + ); + await captureTranscript(input, handle); + throw error; + } + if (repairAttemptUsed) { + await failPhaseAndRequestGate( + input, + eventRepository, + attempt, + "artifact_timeout", + "artifact_timeout_exhausted", + { + expectedArtifactPath: input.expectedArtifactPath, + }, + handle.sessionId, + ); + await captureTranscript(input, handle); + throw error; + } + + const timeoutRepairAttempt = await startPhaseAndRecord( + input, + eventRepository, + ["awaiting_artifact"], + { + reason: "artifact_timeout", + repair: true, + }, + ); + repairAttemptUsed = true; + try { + await removeStaleArtifact(input); + } catch (error) { + await failRunAndDisposeSession( + input, + eventRepository, + timeoutRepairAttempt, + "stale_artifact_remove_failed", + handle, + ); + await captureTranscript(input, handle); + throw error; + } + const timeoutRepairEnvelope = buildEnvelope( + input, + timeoutRepairAttempt, + repairInstructionsFor(input.instructions), + ); + try { + promptSend = await sendPromptAndRecord( + input, + eventRepository, + handle, + timeoutRepairEnvelope, + "prompt.repaired", + ); + promptId = promptSend.promptId; + } catch (repairError) { + if (!shouldCreateHumanGate(repairError)) { + await failRunAndDisposeSession( + input, + eventRepository, + timeoutRepairAttempt, + "prompt_send_failed", + handle, + ); + await captureTranscript(input, handle); + throw repairError; + } + const gateError = toHumanRequiredRecoveryError(repairError); + await failPhaseAndRequestGate( + input, + eventRepository, + timeoutRepairAttempt, + "prompt_send_failed", + gateError.code, + { + errorCode: repairError.code, + expectedArtifactPath: input.expectedArtifactPath, + recoveryHint: gateError.recoveryHint, + }, + handle.sessionId, + { markSessionCrashed: true }, + ); + await captureTranscript(input, handle); + throw gateError; + } + + try { + outcome = await waitForAndValidateArtifact( + input, + eventRepository, + timeoutRepairAttempt, + handle.sessionId, + promptSend.artifactBaselineSignature, + ); + } catch (repairError) { + if (!isDevflowErrorWithCode(repairError, "artifact_timeout_exhausted")) { + await failRunAndDisposeSession( + input, + eventRepository, + timeoutRepairAttempt, + "artifact_repair_failed", + handle, + ); + await captureTranscript(input, handle); + throw repairError; + } + await failPhaseAndRequestGate( + input, + eventRepository, + timeoutRepairAttempt, + "artifact_timeout", + "artifact_timeout_exhausted", + { + expectedArtifactPath: input.expectedArtifactPath, + }, + handle.sessionId, + ); + await captureTranscript(input, handle); + throw repairError; + } + await markSessionIdle( + input, + eventRepository, + handle.sessionId, + timeoutRepairEnvelope.dedupKey, + ); + } + } + if (outcome === undefined) { + throw new DevflowError("Artifact outcome missing after fake phase wait", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + if (outcome.attempt === attempt && !initialPromptIdleRecorded) { + await markSessionIdle(input, eventRepository, handle.sessionId, promptDedupKeyForIdle); + } + + if (!outcome.validation.ok) { + if (repairAttemptUsed) { + await failPhaseAndRequestGate( + input, + eventRepository, + outcome.attempt, + "artifact_invalid", + "artifact_invalid_after_repair", + { + artifactId: outcome.artifact.id, + expectedArtifactPath: input.expectedArtifactPath, + }, + handle.sessionId, + ); + await captureTranscript(input, handle); + throw new DevflowError("Artifact remained invalid after repair", { + class: "human_required", + code: "artifact_invalid_after_repair", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + const repairAttempt = await startPhaseAndRecord(input, eventRepository, ["validating"], { + repair: true, + }); + repairAttemptUsed = true; + try { + await removeStaleArtifact(input); + } catch (error) { + await failRunAndDisposeSession( + input, + eventRepository, + repairAttempt, + "stale_artifact_remove_failed", + handle, + ); + await captureTranscript(input, handle); + throw error; + } + const repairEnvelope = buildEnvelope( + input, + repairAttempt, + repairInstructionsFor(input.instructions), + ); + try { + promptSend = await sendPromptAndRecord( + input, + eventRepository, + handle, + repairEnvelope, + "prompt.repaired", + ); + promptId = promptSend.promptId; + } catch (error) { + if (!shouldCreateHumanGate(error)) { + await failRunAndDisposeSession( + input, + eventRepository, + repairAttempt, + "prompt_send_failed", + handle, + ); + await captureTranscript(input, handle); + throw error; + } + const gateError = toHumanRequiredRecoveryError(error); + await failPhaseAndRequestGate( + input, + eventRepository, + repairAttempt, + "prompt_send_failed", + gateError.code, + { + errorCode: error.code, + expectedArtifactPath: input.expectedArtifactPath, + recoveryHint: gateError.recoveryHint, + }, + handle.sessionId, + { markSessionCrashed: true }, + ); + await captureTranscript(input, handle); + throw gateError; + } + try { + outcome = await waitForAndValidateArtifact( + input, + eventRepository, + repairAttempt, + handle.sessionId, + promptSend.artifactBaselineSignature, + ); + } catch (error) { + if (!isDevflowErrorWithCode(error, "artifact_timeout_exhausted")) { + await failRunAndDisposeSession( + input, + eventRepository, + repairAttempt, + "artifact_repair_failed", + handle, + ); + await captureTranscript(input, handle); + throw error; + } + await failPhaseAndRequestGate( + input, + eventRepository, + repairAttempt, + "artifact_timeout", + "artifact_timeout_exhausted", + { + expectedArtifactPath: input.expectedArtifactPath, + }, + handle.sessionId, + ); + await captureTranscript(input, handle); + throw error; + } + await markSessionIdle(input, eventRepository, handle.sessionId, repairEnvelope.dedupKey); + } + + if (outcome.validation.ok) { + await completePhaseAndRun(input, eventRepository, outcome.attempt, handle.sessionId); + } else { + await failPhaseAndRequestGate( + input, + eventRepository, + outcome.attempt, + "artifact_invalid", + "artifact_invalid_after_repair", + { + artifactId: outcome.artifact.id, + expectedArtifactPath: input.expectedArtifactPath, + }, + handle.sessionId, + ); + await captureTranscript(input, handle); + throw new DevflowError("Artifact remained invalid after repair", { + class: "human_required", + code: "artifact_invalid_after_repair", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + const transcript = await captureTranscript(input, handle); + + return { + sessionId: handle.sessionId, + promptId, + artifactId: outcome.artifact.id, + artifactHash: outcome.artifactHash, + artifactValid: outcome.validation.ok, + transcriptCaptured: transcript.captured, + }; +} + +async function enterInitialPhase( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, +): Promise { + const attempt = await tryStartPhaseAndRecord(input, eventRepository, ["pending"]); + if (attempt !== undefined) { + return { + attempt, + continueArtifactWait: false, + continueValidation: false, + repairAttemptUsed: false, + resumedPrompt: false, + }; + } + + const [phase] = await input.db + .select({ attempts: runPhases.attempts, state: runPhases.state }) + .from(runPhases) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + if (phase === undefined) { + throw new DevflowError("Run phase does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + if (phase.state === "running" && phase.attempts > 0) { + const phaseStart = await phaseStartReplayMetadata(input, phase.attempts); + if (phaseStart === undefined) { + throw cannotReplayPhase(input, phase.state); + } + const instructions = phaseStart.repairAttemptUsed + ? repairInstructionsFor(input.instructions) + : input.instructions; + const envelope = buildEnvelope(input, phase.attempts, instructions); + const [session] = await input.db + .select({ + expectedArtifactPath: tuiSessions.expectedArtifactPath, + expectedSchema: tuiSessions.expectedSchema, + id: tuiSessions.id, + lastPromptHash: tuiSessions.lastPromptHash, + lastPromptAt: tuiSessions.lastPromptAt, + roleId: tuiSessions.roleId, + state: tuiSessions.state, + }) + .from(tuiSessions) + .where(and(eq(tuiSessions.runId, input.runId), eq(tuiSessions.roleId, input.roleId))); + if (session === undefined) { + return { + attempt: phase.attempts, + continueArtifactWait: false, + continueValidation: false, + repairAttemptUsed: phaseStart.repairAttemptUsed, + resumedPrompt: false, + }; + } + if ( + session.expectedArtifactPath === input.expectedArtifactPath && + session.expectedSchema === input.expectedSchema && + ["CREATED", "BOOTSTRAPPING", "READY"].includes(session.state) + ) { + return { + attempt: phase.attempts, + continueArtifactWait: false, + continueValidation: false, + repairAttemptUsed: phaseStart.repairAttemptUsed, + resumedPrompt: false, + handle: { sessionId: session.id }, + }; + } + if ( + session.state === "BUSY" && + session.lastPromptHash === envelope.dedupKey && + session.expectedArtifactPath === input.expectedArtifactPath && + session.expectedSchema === input.expectedSchema + ) { + return { + attempt: phase.attempts, + continueArtifactWait: false, + continueValidation: false, + repairAttemptUsed: phaseStart.repairAttemptUsed, + resumedPrompt: true, + handle: { sessionId: session.id }, + }; + } + } + + if (phase.state === "awaiting_artifact" && phase.attempts > 0) { + const phaseStart = await phaseStartReplayMetadata(input, phase.attempts); + if (phaseStart === undefined) { + throw cannotReplayPhase(input, phase.state); + } + const instructions = phaseStart.repairAttemptUsed + ? repairInstructionsFor(input.instructions) + : input.instructions; + const envelope = buildEnvelope(input, phase.attempts, instructions); + const [session] = await input.db + .select({ + expectedArtifactPath: tuiSessions.expectedArtifactPath, + expectedSchema: tuiSessions.expectedSchema, + id: tuiSessions.id, + lastPromptHash: tuiSessions.lastPromptHash, + lastPromptAt: tuiSessions.lastPromptAt, + state: tuiSessions.state, + }) + .from(tuiSessions) + .where(and(eq(tuiSessions.runId, input.runId), eq(tuiSessions.roleId, input.roleId))); + if ( + session !== undefined && + session.state !== "FAILED_NEEDS_HUMAN" && + session.lastPromptHash === envelope.dedupKey && + session.expectedArtifactPath === input.expectedArtifactPath && + session.expectedSchema === input.expectedSchema + ) { + return { + attempt: phase.attempts, + continueArtifactWait: true, + continueValidation: false, + promptId: session.lastPromptHash, + repairAttemptUsed: phaseStart.repairAttemptUsed, + resumedPrompt: true, + handle: { sessionId: session.id }, + }; + } + } + + if (phase.state === "validating" && phase.attempts > 0) { + const phaseStart = await phaseStartReplayMetadata(input, phase.attempts); + if (phaseStart === undefined) { + throw cannotReplayPhase(input, phase.state); + } + const [artifact] = await input.db + .select({ + createdAt: artifacts.createdAt, + hash: artifacts.hash, + id: artifacts.id, + path: artifacts.path, + valid: artifacts.valid, + validationError: artifacts.validationError, + }) + .from(artifacts) + .where( + and( + eq(artifacts.runId, input.runId), + eq(artifacts.phaseId, input.phaseId), + eq(artifacts.path, input.expectedArtifactPath), + eq(artifacts.schemaId, input.expectedSchema), + ), + ) + .orderBy(desc(artifacts.createdAt)) + .limit(1); + const instructions = phaseStart.repairAttemptUsed + ? repairInstructionsFor(input.instructions) + : input.instructions; + const envelope = buildEnvelope(input, phase.attempts, instructions); + const [session] = await input.db + .select({ + expectedArtifactPath: tuiSessions.expectedArtifactPath, + expectedSchema: tuiSessions.expectedSchema, + id: tuiSessions.id, + lastPromptHash: tuiSessions.lastPromptHash, + lastPromptAt: tuiSessions.lastPromptAt, + roleId: tuiSessions.roleId, + }) + .from(tuiSessions) + .where(and(eq(tuiSessions.runId, input.runId), eq(tuiSessions.roleId, input.roleId))); + if ( + artifact !== undefined && + session !== undefined && + session.lastPromptHash !== null && + session.lastPromptHash === envelope.dedupKey && + artifact.createdAt >= (session.lastPromptAt ?? new Date(0)) + ) { + const validation = persistedArtifactValidation(input, artifact); + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: validation.ok ? "artifact.validated" : "artifact.invalid", + payload: validation.ok + ? { + artifactId: artifact.id, + hash: artifact.hash, + path: artifact.path, + schemaId: input.expectedSchema, + } + : { + artifactId: artifact.id, + errors: validation.errors, + hash: artifact.hash, + path: artifact.path, + schemaId: input.expectedSchema, + }, + idempotencyKey: `${validation.ok ? "artifact.validated" : "artifact.invalid"}:${input.phaseId}:${artifact.path}:${artifact.hash}`, + }); + return { + attempt: phase.attempts, + continueArtifactWait: false, + continueValidation: false, + promptId: session.lastPromptHash, + repairAttemptUsed: phaseStart.repairAttemptUsed, + replayedOutcome: { + attempt: phase.attempts, + artifact: { id: artifact.id }, + artifactHash: artifact.hash, + validation, + }, + resumedPrompt: false, + handle: { sessionId: session.id }, + }; + } + if ( + session !== undefined && + session.lastPromptHash === envelope.dedupKey && + session.expectedArtifactPath === input.expectedArtifactPath && + session.expectedSchema === input.expectedSchema + ) { + return { + attempt: phase.attempts, + continueArtifactWait: false, + continueValidation: true, + promptId: session.lastPromptHash, + repairAttemptUsed: phaseStart.repairAttemptUsed, + resumedPrompt: true, + handle: { sessionId: session.id }, + }; + } + } + + throw cannotReplayPhase(input, phase.state); +} + +function cannotReplayPhase(input: RunSingleFakePhaseInput, phaseState: string): DevflowError { + return new DevflowError("Cannot start a fake phase from the current phase state", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: `phase_state=${phaseState}`, + }); +} + +function requirePhaseEntryPromptId( + input: RunSingleFakePhaseInput, + phaseEntry: PhaseEntry, + context: string, +): string { + if (phaseEntry.promptId !== undefined) { + return phaseEntry.promptId; + } + + throw new DevflowError(`${context} did not include a prompt id`, { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); +} + +interface PhaseStartReplayMetadata { + repairAttemptUsed: boolean; +} + +async function phaseStartReplayMetadata( + input: RunSingleFakePhaseInput, + attempt: number, +): Promise { + const [event] = await input.db + .select({ payload: runEvents.payload }) + .from(runEvents) + .where( + and( + eq(runEvents.runId, input.runId), + eq(runEvents.phaseId, input.phaseId), + eq(runEvents.type, "phase.started"), + eq(runEvents.idempotencyKey, `phase.started:${input.phaseId}:${attempt}`), + ), + ) + .limit(1); + if (event === undefined) { + return undefined; + } + + const payload = event.payload; + const repairAttemptUsed = + typeof payload === "object" && + payload !== null && + "repair" in payload && + payload.repair === true; + return { repairAttemptUsed }; +} + +interface PersistedArtifactReplay { + valid: boolean; + validationError: unknown; +} + +function persistedArtifactValidation( + input: RunSingleFakePhaseInput, + artifact: PersistedArtifactReplay, +): ReturnType { + if (artifact.valid) { + return { ok: true }; + } + + if ( + typeof artifact.validationError === "object" && + artifact.validationError !== null && + "errors" in artifact.validationError && + Array.isArray(artifact.validationError.errors) + ) { + return { ok: false, errors: artifact.validationError.errors }; + } + + throw new DevflowError("Invalid artifact replay is missing validation errors", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); +} + +async function startPhaseAndRecord( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + allowedCurrentStates: readonly string[], + payload: Record = {}, +): Promise { + const attempt = await tryStartPhaseAndRecord( + input, + eventRepository, + allowedCurrentStates, + payload, + ); + if (attempt !== undefined) { + return attempt; + } + + const [phase] = await input.db + .select({ state: runPhases.state }) + .from(runPhases) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + if (phase === undefined) { + throw new DevflowError("Run phase does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + throw new DevflowError("Cannot start a fake phase from the current phase state", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: `phase_state=${phase.state}`, + }); +} + +async function tryStartPhaseAndRecord( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + allowedCurrentStates: readonly string[], + payload: Record = {}, +): Promise { + return input.db.transaction(async (tx) => { + await tx.execute(sql`SELECT 1 FROM ${runs} WHERE ${runs.id} = ${input.runId} FOR UPDATE`); + const [run] = await tx.select({ state: runs.state }).from(runs).where(eq(runs.id, input.runId)); + if (run === undefined || run.state !== "executing") { + throw new DevflowError("Cannot start a fake phase unless the run is executing", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + const [updatedPhase] = await tx + .update(runPhases) + .set({ + attempts: sql`${runPhases.attempts} + 1`, + state: "running", + startedAt: new Date(), + }) + .where( + and( + eq(runPhases.id, input.phaseId), + eq(runPhases.runId, input.runId), + inArray(runPhases.state, [...allowedCurrentStates]), + ), + ) + .returning({ attempts: runPhases.attempts }); + + if (updatedPhase !== undefined) { + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "phase.started", + payload: { phaseKey: input.phaseKey, attempt: updatedPhase.attempts, ...payload }, + idempotencyKey: `phase.started:${input.phaseId}:${updatedPhase.attempts}`, + }); + return updatedPhase.attempts; + } + + const [phaseExists] = await tx + .select({ id: runPhases.id }) + .from(runPhases) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + if (phaseExists === undefined) { + throw new DevflowError("Run phase does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + return undefined; + }); +} + +async function failPhaseAndRequestGate( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, + reason: string, + gateKey: string, + payload: Record, + sessionId?: string, + options: { markSessionCrashed?: boolean } = {}, +) { + try { + await input.db.transaction(async (tx) => { + const request = await ensureHumanGateRequestInTransaction( + input, + tx, + gateKey, + attempt, + payload, + ); + + await tx + .update(runPhases) + .set({ state: "failed", endedAt: new Date() }) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "phase.failed", + payload: { phaseKey: input.phaseKey, attempt, reason }, + idempotencyKey: `phase.failed:${input.phaseId}:${attempt}`, + }); + + const [run] = await tx + .select({ state: runs.state }) + .from(runs) + .where(eq(runs.id, input.runId)); + if (run === undefined) { + throw new DevflowError("Run does not exist", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + if (run.state !== "paused") { + const cause = `human_required:${gateKey}:${input.phaseId}:${attempt}`; + await tx + .update(runs) + .set({ state: "paused", pausedFromState: run.state }) + .where(eq(runs.id, input.runId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + type: "run.paused", + payload: { cause, pausedFromState: run.state }, + idempotencyKey: `run.paused:${input.runId}:${cause}`, + }); + } + + if (sessionId !== undefined && options.markSessionCrashed === true) { + const [session] = await tx + .select({ recoveryAttempts: tuiSessions.recoveryAttempts }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + const recoveryAttempts = (session?.recoveryAttempts ?? 0) + 1; + await tx + .update(tuiSessions) + .set({ state: "CRASHED", recoveryAttempts }) + .where(eq(tuiSessions.id, sessionId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.crashed", + payload: { sessionId, roleId: input.roleId, recoveryAttempts }, + idempotencyKey: `session.crashed:${sessionId}:${recoveryAttempts}`, + }); + } + + if (sessionId !== undefined) { + await tx + .update(tuiSessions) + .set({ state: "FAILED_NEEDS_HUMAN" }) + .where(eq(tuiSessions.id, sessionId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.failed", + payload: { sessionId, roleId: input.roleId }, + idempotencyKey: `session.failed:${sessionId}`, + }); + } + + await appendHumanGateRequestedEventInTransaction( + input, + eventRepository, + tx, + request, + gateKey, + ); + }); + } catch (error) { + await failPhaseAndRun(input, eventRepository, attempt, "approval_request_failed"); + if (sessionId !== undefined) { + await markSessionFailedNeedsHuman(input, eventRepository, sessionId); + } + throw error; + } +} + +async function failPhaseAndRun( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, + reason: string, +) { + await input.db.transaction(async (tx) => { + await tx + .update(runPhases) + .set({ state: "failed", endedAt: new Date() }) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "phase.failed", + payload: { phaseKey: input.phaseKey, attempt, reason }, + idempotencyKey: `phase.failed:${input.phaseId}:${attempt}`, + }); + await tx + .update(runs) + .set({ state: "failed", endedAt: new Date() }) + .where(eq(runs.id, input.runId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + type: "run.failed", + payload: { reason }, + idempotencyKey: `run.failed:${input.runId}`, + }); + }); +} + +async function failRunAndDisposeSession( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, + reason: string, + handle: { sessionId: string }, +) { + await failPhaseAndRun(input, eventRepository, attempt, reason); + await input.adapter.dispose(handle); + await markSessionFailedNeedsHuman(input, eventRepository, handle.sessionId); +} + +async function completePhaseAndRun( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, + sessionId: string, +) { + await input.db.transaction(async (tx) => { + await tx + .update(runPhases) + .set({ state: "completed", endedAt: new Date() }) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "phase.completed", + payload: { phaseKey: input.phaseKey, attempt }, + idempotencyKey: `phase.completed:${input.phaseId}:${attempt}`, + }); + + const [session] = await tx + .select({ recoveryAttempts: tuiSessions.recoveryAttempts }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + const recoveryAttempts = session?.recoveryAttempts ?? 0; + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.ready", + payload: { sessionId, roleId: input.roleId, recoveryAttempts }, + idempotencyKey: `session.ready:${sessionId}:${recoveryAttempts}`, + }); + + await tx + .update(runs) + .set({ state: "completed", endedAt: new Date() }) + .where(eq(runs.id, input.runId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + type: "run.completed", + payload: { phaseKey: input.phaseKey }, + idempotencyKey: `run.completed:${input.runId}`, + }); + }); +} + +async function startSessionAndRecord( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, +): Promise { + let handle: SessionHandle | undefined; + let sessionRowPersisted = false; + try { + handle = await input.adapter.start({ + runId: input.runId, + roleId: input.roleId, + backend: "fake", + cwd: input.worktreeRoot, + expectedArtifactPath: input.expectedArtifactPath, + expectedSchema: input.expectedSchema, + }); + const startedHandle = handle; + await input.db.transaction(async (tx) => { + await tx.insert(tuiSessions).values({ + id: startedHandle.sessionId, + runId: input.runId, + roleId: input.roleId, + backend: "fake", + cwd: input.worktreeRoot, + expectedArtifactPath: input.expectedArtifactPath, + expectedSchema: input.expectedSchema, + state: "CREATED", + }); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.created", + payload: { sessionId: startedHandle.sessionId, roleId: input.roleId, backend: "fake" }, + idempotencyKey: `session.created:${startedHandle.sessionId}`, + }); + await tx + .update(tuiSessions) + .set({ state: "BOOTSTRAPPING" }) + .where(eq(tuiSessions.id, startedHandle.sessionId)); + await tx + .update(tuiSessions) + .set({ state: "READY" }) + .where(eq(tuiSessions.id, startedHandle.sessionId)); + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "session.ready", + payload: { sessionId: startedHandle.sessionId, roleId: input.roleId, recoveryAttempts: 0 }, + idempotencyKey: `session.ready:${startedHandle.sessionId}:0`, + }); + }); + sessionRowPersisted = true; + return startedHandle; + } catch (error) { + if (handle !== undefined) { + await input.adapter.dispose(handle); + } + + if (shouldCreateHumanGate(error)) { + const gateError = toHumanRequiredRecoveryError(error); + await failPhaseAndRequestGate( + input, + eventRepository, + attempt, + "session_start_failed", + gateError.code, + { errorCode: error.code, recoveryHint: gateError.recoveryHint }, + sessionRowPersisted ? handle?.sessionId : undefined, + ); + throw gateError; + } + + await failPhaseAndRun(input, eventRepository, attempt, "session_start_failed"); + if (sessionRowPersisted && handle !== undefined) { + await markSessionFailedNeedsHuman(input, eventRepository, handle.sessionId); + } + throw error; + } +} + +async function setPhaseState( + input: RunSingleFakePhaseInput, + state: "running" | "awaiting_artifact" | "validating" | "completed" | "failed", +) { + await input.db + .update(runPhases) + .set({ state }) + .where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId))); +} + +function buildEnvelope( + input: RunSingleFakePhaseInput, + attempt: number, + instructions: string, +): PromptEnvelope { + const envelopeWithoutUuid = { + runId: input.runId, + roleId: input.roleId, + phaseKey: input.phaseKey, + expectedArtifact: input.expectedArtifactPath, + expectedSchema: input.expectedSchema, + instructions, + attempt, + }; + + return { + uuid: input.uuidFactory?.() ?? randomUUID(), + runId: input.runId, + roleId: input.roleId, + phaseKey: input.phaseKey, + attempt, + expectedArtifact: input.expectedArtifactPath, + expectedSchema: input.expectedSchema, + dedupKey: hash(envelopeWithoutUuid), + instructions, + }; +} + +interface PromptSendRecord { + promptId: string; + artifactBaselineSignature: string | undefined; +} + +interface SendPromptAndRecordOptions { + captureArtifactBaseline?: boolean; +} + +async function sendPromptAndRecord( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + handle: { sessionId: string }, + envelope: PromptEnvelope, + type: "prompt.sent" | "prompt.repaired", + options: SendPromptAndRecordOptions = {}, +): Promise { + await input.db + .update(tuiSessions) + .set({ + state: "BUSY", + lastPromptHash: envelope.dedupKey, + lastPromptAt: new Date(), + }) + .where(eq(tuiSessions.id, handle.sessionId)); + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: "session.busy", + payload: { sessionId: handle.sessionId, roleId: input.roleId, dedupKey: envelope.dedupKey }, + idempotencyKey: `session.busy:${handle.sessionId}:${envelope.dedupKey}`, + }); + + const artifactBaselineSignature = + options.captureArtifactBaseline === false + ? undefined + : await artifactSignature(input.expectedArtifactPath); + const prompt = await sendPromptWithRetry(input.adapter, handle, envelope); + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type, + payload: { roleId: input.roleId, dedupKey: envelope.dedupKey }, + idempotencyKey: `${type}:${envelope.dedupKey}`, + }); + + return { promptId: prompt.promptId, artifactBaselineSignature }; +} + +async function sendPromptWithRetry( + adapter: SessionAdapter, + handle: { sessionId: string }, + envelope: PromptEnvelope, +): Promise<{ promptId: string }> { + let lastError: unknown; + for (let physicalAttempt = 0; physicalAttempt <= sendPromptRetryBudget; physicalAttempt += 1) { + try { + return await adapter.sendPrompt(handle, envelope); + } catch (error) { + lastError = error; + if (!(error instanceof DevflowError) || error.class !== "recoverable") { + throw error; + } + } + } + + throw lastError; +} + +interface ArtifactOutcome { + attempt: number; + artifact: { id: string }; + artifactHash: string; + validation: ReturnType; +} + +interface ArtifactRecord { + id: string; + phaseId: string | null; + schemaId: string; + valid: boolean; + validationError: unknown; +} + +async function waitForAndValidateArtifact( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, + sessionId: string, + artifactBaselineSignature: string | undefined, +): Promise { + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: "artifact.expected", + payload: { + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + attempt, + }, + idempotencyKey: `artifact.expected:${input.phaseId}:${attempt}:${input.expectedArtifactPath}`, + }); + + await setPhaseState(input, "awaiting_artifact"); + try { + const waitOptions: ArtifactWaitOptions = { ...input.wait }; + if (artifactBaselineSignature !== undefined) { + waitOptions.ignoreInitialSignature = artifactBaselineSignature; + } + await waitForArtifact(input.expectedArtifactPath, waitOptions); + } catch (error) { + if (!isDevflowErrorWithCode(error, "artifact_timeout_exhausted")) { + throw error; + } + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: "artifact.timeout", + payload: { + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + attempt, + }, + idempotencyKey: `artifact.timeout:${input.phaseId}:${attempt}:${input.expectedArtifactPath}`, + }); + await input.db + .update(tuiSessions) + .set({ state: "ARTIFACT_TIMEOUT" }) + .where(eq(tuiSessions.id, sessionId)); + throw error; + } + + await setPhaseState(input, "validating"); + return validateCurrentArtifact(input, eventRepository, attempt); +} + +async function validateCurrentArtifact( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + attempt: number, +): Promise { + const artifactBytes = await readArtifactBytes(input); + const artifactHash = createHash("sha256").update(artifactBytes).digest("hex"); + const parsedArtifact = parseArtifactJson(artifactBytes); + const validation = validateArtifact(input.expectedSchema, parsedArtifact); + + const artifact = await insertArtifactRecord(input, artifactHash, validation); + if (artifact === undefined) { + throw new DevflowError("Artifact insert returned no row", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: validation.ok ? "artifact.validated" : "artifact.invalid", + payload: validation.ok + ? { + artifactId: artifact.id, + hash: artifactHash, + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + } + : { + artifactId: artifact.id, + hash: artifactHash, + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + errors: validation.errors, + }, + idempotencyKey: `${validation.ok ? "artifact.validated" : "artifact.invalid"}:${input.phaseId}:${input.expectedArtifactPath}:${artifactHash}`, + }); + + return { attempt, artifact, artifactHash, validation }; +} + +async function insertArtifactRecord( + input: RunSingleFakePhaseInput, + artifactHash: string, + validation: ReturnType, +): Promise<{ id: string } | undefined> { + const validationError = validation.ok ? null : { errors: validation.errors }; + const inserted = await input.db + .insert(artifacts) + .values({ + runId: input.runId, + phaseId: input.phaseId, + path: input.expectedArtifactPath, + schemaId: input.expectedSchema, + hash: artifactHash, + valid: validation.ok, + validationError, + }) + .onConflictDoNothing({ target: [artifacts.runId, artifacts.path, artifacts.hash] }) + .returning({ + id: artifacts.id, + phaseId: artifacts.phaseId, + schemaId: artifacts.schemaId, + valid: artifacts.valid, + validationError: artifacts.validationError, + }); + + const artifact = + inserted[0] ?? + ( + await input.db + .select({ + id: artifacts.id, + phaseId: artifacts.phaseId, + schemaId: artifacts.schemaId, + valid: artifacts.valid, + validationError: artifacts.validationError, + }) + .from(artifacts) + .where( + and( + eq(artifacts.runId, input.runId), + eq(artifacts.path, input.expectedArtifactPath), + eq(artifacts.hash, artifactHash), + ), + ) + .limit(1) + )[0]; + + if (artifact !== undefined) { + assertArtifactReplayMatches(input, validation.ok, validationError, artifact); + } + + return artifact === undefined ? undefined : { id: artifact.id }; +} + +function assertArtifactReplayMatches( + input: RunSingleFakePhaseInput, + valid: boolean, + validationError: unknown, + artifact: ArtifactRecord, +) { + const samePhase = artifact.phaseId === input.phaseId; + const sameSchema = artifact.schemaId === input.expectedSchema; + const sameValidity = artifact.valid === valid; + const sameValidationError = + canonicalize(artifact.validationError) === canonicalize(validationError); + if (samePhase && sameSchema && sameValidity && sameValidationError) { + return; + } + + throw new DevflowError("Artifact replay does not match current validation context", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); +} + +async function markSessionIdle( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + sessionId: string, + promptDedupKey: string, +) { + await input.db.update(tuiSessions).set({ state: "READY" }).where(eq(tuiSessions.id, sessionId)); + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: "session.idle", + payload: { sessionId, roleId: input.roleId, dedupKey: promptDedupKey }, + idempotencyKey: `session.idle:${sessionId}:${promptDedupKey}`, + }); +} + +async function markSessionReady( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + sessionId: string, +) { + const [session] = await input.db + .select({ recoveryAttempts: tuiSessions.recoveryAttempts }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + const recoveryAttempts = session?.recoveryAttempts ?? 0; + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: "session.ready", + payload: { sessionId, roleId: input.roleId, recoveryAttempts }, + idempotencyKey: `session.ready:${sessionId}:${recoveryAttempts}`, + }); +} + +async function recoverFromArtifactTimeout( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + sessionId: string, +): Promise { + const probe = await probeWithTypedError(input.adapter, { sessionId }); + if (!probe.alive || !probe.paneActive) { + return false; + } + await input.db + .update(tuiSessions) + .set({ state: "RESUMING" }) + .where(eq(tuiSessions.id, sessionId)); + + const rebootstrapOk = await rebootstrapWithRetry(input.adapter, { sessionId }); + if (!rebootstrapOk) { + return false; + } + await input.db + .update(tuiSessions) + .set({ state: "REBOOTSTRAPPED" }) + .where(eq(tuiSessions.id, sessionId)); + + const [session] = await input.db + .select({ recoveryAttempts: tuiSessions.recoveryAttempts }) + .from(tuiSessions) + .where(eq(tuiSessions.id, sessionId)); + const recoveryAttempts = (session?.recoveryAttempts ?? 0) + 1; + await input.db + .update(tuiSessions) + .set({ state: "READY", recoveryAttempts }) + .where(eq(tuiSessions.id, sessionId)); + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: "session.recovered", + payload: { sessionId, roleId: input.roleId, recoveryAttempts }, + idempotencyKey: `session.recovered:${sessionId}:${recoveryAttempts}`, + }); + return true; +} + +async function probeWithTypedError( + adapter: SessionAdapter, + handle: { sessionId: string }, +): ReturnType { + try { + return await adapter.probe(handle); + } catch (error) { + if (error instanceof DevflowError) { + throw error; + } + throw new DevflowError("Unclassified probe failure", { + class: "fatal", + code: "internal_state_corruption", + cause: error, + }); + } +} + +async function rebootstrapWithRetry( + adapter: SessionAdapter, + handle: { sessionId: string }, +): Promise { + for (let attemptsRemaining = 2; attemptsRemaining > 0; attemptsRemaining -= 1) { + try { + await adapter.rebootstrap(handle); + return true; + } catch (error) { + if (!(error instanceof DevflowError)) { + throw new DevflowError("Unclassified rebootstrap failure", { + class: "fatal", + code: "internal_state_corruption", + cause: error, + }); + } + if (error.class !== "recoverable") { + throw error; + } + // Retry budget is intentionally one rebootstrap retry after the first failure. + } + } + + return false; +} + +async function markSessionFailedNeedsHuman( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + sessionId: string, +) { + await input.db + .update(tuiSessions) + .set({ state: "FAILED_NEEDS_HUMAN" }) + .where(eq(tuiSessions.id, sessionId)); + await eventRepository.append({ + runId: input.runId, + phaseId: input.phaseId, + type: "session.failed", + payload: { sessionId, roleId: input.roleId }, + idempotencyKey: `session.failed:${sessionId}`, + }); +} + +async function waitForArtifact(path: string, options: ArtifactWaitOptions = {}): Promise { + const timeoutMs = options.timeoutMs ?? 5_000; + const pollIntervalMs = options.pollIntervalMs ?? 25; + const stableMs = options.stableMs ?? 500; + const ignoreInitialSignature = options.ignoreInitialSignature; + const deadline = Date.now() + timeoutMs; + let lastSignature: string | undefined; + let stableSince: number | undefined; + + while (Date.now() <= deadline) { + try { + const signature = await artifactSignature(path); + if (signature === undefined || signature === ignoreInitialSignature) { + lastSignature = undefined; + stableSince = undefined; + await sleep(pollIntervalMs); + continue; + } + if (lastSignature === signature) { + stableSince ??= Date.now(); + if (Date.now() - stableSince >= stableMs) { + return; + } + } else { + lastSignature = signature; + stableSince = Date.now(); + if (stableMs === 0) { + return; + } + } + } catch (cause) { + if (cause instanceof DevflowError) { + throw cause; + } + if (isNodeError(cause) && cause.code === "ENOENT") { + lastSignature = undefined; + stableSince = undefined; + } else { + throw new DevflowError("Failed to stat expected artifact path", { + class: "fatal", + code: "workspace_permissions", + recoveryHint: path, + cause, + }); + } + } + await sleep(pollIntervalMs); + } + + throw new DevflowError("Timed out waiting for fake phase artifact", { + class: "human_required", + code: "artifact_timeout_exhausted", + recoveryHint: path, + }); +} + +async function artifactSignature(path: string): Promise { + try { + const artifactStat = await stat(path); + return `${artifactStat.size}:${artifactStat.mtimeMs}`; + } catch (cause) { + if (isNodeError(cause) && cause.code === "ENOENT") { + return undefined; + } + throw new DevflowError("Failed to stat expected artifact path", { + class: "fatal", + code: "workspace_permissions", + recoveryHint: path, + cause, + }); + } +} + +function parseArtifactJson(bytes: Buffer): unknown { + try { + return JSON.parse(bytes.toString("utf8")) as unknown; + } catch (cause) { + return { + __devflowParseError: "invalid_json", + message: cause instanceof Error ? cause.message : String(cause), + }; + } +} + +async function readArtifactBytes(input: RunSingleFakePhaseInput): Promise { + try { + return await readFile(input.expectedArtifactPath); + } catch (cause) { + throw new DevflowError("Failed to read expected artifact", { + class: "fatal", + code: "workspace_permissions", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: input.expectedArtifactPath, + cause, + }); + } +} + +function repairInstructionsFor(instructions: string): string { + const repairLine = "Repair the artifact so it conforms to the expected schema."; + const repairScenario = + /^Repair-Scenario:\s*([A-Za-z0-9_-]+)\s*$/m.exec(instructions)?.[1] ?? "ok"; + if (/^Scenario:\s*[A-Za-z0-9_-]+\s*$/m.test(instructions)) { + return instructions.replace( + /^Scenario:\s*[A-Za-z0-9_-]+\s*$/m, + `Scenario: ${repairScenario}\n${repairLine}`, + ); + } + + return `Scenario: ${repairScenario}\n${repairLine}\n${instructions}`; +} + +interface HumanGateRequest { + id: string; + idempotencyKey: string; +} + +async function ensureHumanGateRequestInTransaction( + input: RunSingleFakePhaseInput, + tx: TransactionDb, + gateKey: string, + attempt: number, + payload: Record, +): Promise { + const idempotencyKey = `${input.runId}:${gateKey}:${input.phaseId}:${attempt}`; + const storedPayload = stripUndefinedProperties(payload) as Record; + await tx + .insert(approvalRequests) + .values({ + runId: input.runId, + phaseId: input.phaseId, + gateKey, + state: "pending", + idempotencyKey, + payload: storedPayload, + }) + .onConflictDoNothing({ target: approvalRequests.idempotencyKey }); + const [request] = await tx + .select({ + gateKey: approvalRequests.gateKey, + id: approvalRequests.id, + payload: approvalRequests.payload, + phaseId: approvalRequests.phaseId, + runId: approvalRequests.runId, + }) + .from(approvalRequests) + .where(eq(approvalRequests.idempotencyKey, idempotencyKey)) + .limit(1); + + if (request === undefined) { + throw new DevflowError("Approval request insert returned no row", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + if ( + request.runId !== input.runId || + request.phaseId !== input.phaseId || + request.gateKey !== gateKey || + canonicalize(request.payload) !== canonicalize(storedPayload) + ) { + throw new DevflowError("Approval request idempotency replay does not match existing request", { + class: "fatal", + code: "internal_state_corruption", + runId: input.runId, + phaseId: input.phaseId, + }); + } + + return { id: request.id, idempotencyKey }; +} + +async function appendHumanGateRequestedEventInTransaction( + input: RunSingleFakePhaseInput, + eventRepository: RunEventRepository, + tx: TransactionDb, + request: HumanGateRequest, + gateKey: string, +) { + await eventRepository.appendInTransaction(tx, { + runId: input.runId, + phaseId: input.phaseId, + type: "approval.requested", + payload: { + approvalRequestId: request.id, + approvalIdempotencyKey: request.idempotencyKey, + gateKey, + }, + idempotencyKey: `approval.requested:${request.idempotencyKey}`, + }); +} + +function stripUndefinedProperties(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map((item) => stripUndefinedProperties(item)); + } + if (value !== null && typeof value === "object") { + return Object.fromEntries( + Object.entries(value as Record) + .filter(([, child]) => child !== undefined) + .map(([key, child]) => [key, stripUndefinedProperties(child)]), + ); + } + + return value; +} + +function isDevflowErrorWithCode(error: unknown, code: string): error is DevflowError { + return error instanceof DevflowError && error.code === code; +} + +function shouldCreateHumanGate(error: unknown): error is DevflowError { + return error instanceof DevflowError && error.class !== "fatal"; +} + +function toHumanRequiredRecoveryError(error: DevflowError): DevflowError { + if (error.class === "human_required") { + return error; + } + + const options: ConstructorParameters[1] = { + class: "human_required", + code: "prompt_send_exhausted", + recoveryHint: error.recoveryHint ?? error.message, + cause: error, + }; + + if (error.runId !== undefined) { + options.runId = error.runId; + } + if (error.phaseId !== undefined) { + options.phaseId = error.phaseId; + } + + return new DevflowError("Recoverable session error exhausted retry budget", { + ...options, + }); +} + +function toArtifactTimeoutRecoveryGateError(error: DevflowError): DevflowError { + if (error.class === "human_required") { + return error; + } + + const options: ConstructorParameters[1] = { + class: "human_required", + code: "artifact_timeout_exhausted", + recoveryHint: error.recoveryHint ?? error.message, + cause: error, + }; + if (error.runId !== undefined) { + options.runId = error.runId; + } + if (error.phaseId !== undefined) { + options.phaseId = error.phaseId; + } + + return new DevflowError("Artifact timeout recovery exhausted retry budget", options); +} + +async function removeStaleArtifact(input: RunSingleFakePhaseInput): Promise { + try { + await unlink(input.expectedArtifactPath); + } catch (cause) { + if (isNodeError(cause) && cause.code === "ENOENT") { + return; + } + throw new DevflowError("Failed to remove stale artifact before waiting", { + class: "fatal", + code: "workspace_permissions", + runId: input.runId, + phaseId: input.phaseId, + recoveryHint: input.expectedArtifactPath, + cause, + }); + } +} + +async function captureTranscript(input: RunSingleFakePhaseInput, handle: { sessionId: string }) { + const sink = input.transcriptSink ?? new TuiTranscriptRepository(input.db); + const [session] = await input.db + .select({ lastCaptureSeq: tuiSessions.lastCaptureSeq }) + .from(tuiSessions) + .where(eq(tuiSessions.id, handle.sessionId)); + if (session === undefined) { + throw new DevflowError("TUI session does not exist for transcript capture", { + class: "fatal", + code: "session_not_found", + runId: input.runId, + phaseId: input.phaseId, + }); + } + return captureAndPersistTranscript({ + adapter: input.adapter, + handle, + fromSeq: session.lastCaptureSeq, + sink, + }); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function isNodeError(error: unknown): error is NodeJS.ErrnoException { + return error instanceof Error && "code" in error; +} diff --git a/packages/run-engine/src/index.ts b/packages/run-engine/src/index.ts new file mode 100644 index 0000000..930938d --- /dev/null +++ b/packages/run-engine/src/index.ts @@ -0,0 +1,2 @@ +export * from "./fake-phase-harness.js"; +export * from "./run-event-repository.js"; diff --git a/packages/run-engine/src/run-event-repository.ts b/packages/run-engine/src/run-event-repository.ts new file mode 100644 index 0000000..033b7ea --- /dev/null +++ b/packages/run-engine/src/run-event-repository.ts @@ -0,0 +1 @@ +export { RunEventRepository } from "@devflow/db"; diff --git a/packages/run-engine/tsconfig.build.json b/packages/run-engine/tsconfig.build.json new file mode 100644 index 0000000..28a70ef --- /dev/null +++ b/packages/run-engine/tsconfig.build.json @@ -0,0 +1,10 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "composite": false, + "emitDeclarationOnly": true, + "noEmit": false + }, + "references": [], + "exclude": ["src/**/*.test.ts"] +} diff --git a/packages/run-engine/tsconfig.json b/packages/run-engine/tsconfig.json new file mode 100644 index 0000000..c6bd1bd --- /dev/null +++ b/packages/run-engine/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist", + "types": ["node", "vitest"] + }, + "include": ["src/**/*.ts"], + "references": [{ "path": "../core" }, { "path": "../db" }, { "path": "../session" }] +} diff --git a/packages/session/package.json b/packages/session/package.json index f29fc96..85c00fb 100644 --- a/packages/session/package.json +++ b/packages/session/package.json @@ -9,7 +9,7 @@ "scripts": { "build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json", "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", - "test": "vitest run" + "test": "cd ../.. && vitest run --project packages/session" }, "dependencies": { "@devflow/core": "workspace:*" diff --git a/packages/session/src/fake.test.ts b/packages/session/src/fake.test.ts index 7385cb5..5fd1165 100644 --- a/packages/session/src/fake.test.ts +++ b/packages/session/src/fake.test.ts @@ -97,7 +97,25 @@ describe("FakeSessionAdapter", () => { expect(chunks.every((chunk, index) => chunk.seq === BigInt(index + 1))).toBe(true); }); - it("refuses duplicate prompt dedup keys for the same session", async () => { + it("classifies unsupported backend as human-required backend_unavailable", async () => { + const fixtureRoot = makeFixtureRoot(); + tempRoots.push(fixtureRoot); + const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 }); + + await expect( + adapter.start({ + runId, + roleId: "implementer", + backend: "codex", + cwd: fixtureRoot, + }), + ).rejects.toMatchObject({ + class: "human_required", + code: "backend_unavailable", + }); + }); + + it("treats duplicate prompt dedup keys as idempotent success without reprocessing", async () => { const fixtureRoot = makeFixtureRoot(); tempRoots.push(fixtureRoot); const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 }); @@ -116,13 +134,12 @@ describe("FakeSessionAdapter", () => { await adapter.sendPrompt(handle, first); - await expect(adapter.sendPrompt(handle, duplicate)).rejects.toMatchObject({ - code: "duplicate_prompt_dedup_key", - }); + await expect(adapter.sendPrompt(handle, duplicate)).resolves.toEqual({ promptId: dedupKey }); await waitForFile(first.expectedArtifact); + expect(existsSync(duplicate.expectedArtifact)).toBe(false); }); - it("preserves prompt dedup history across crash and rebootstrap recovery", async () => { + it("records dedup history only after a fake prompt is accepted", async () => { const fixtureRoot = makeFixtureRoot(); tempRoots.push(fixtureRoot); const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 }); @@ -141,7 +158,7 @@ describe("FakeSessionAdapter", () => { code: "prompt_send_transient", }); await expect(adapter.sendPrompt(handle, crash)).rejects.toMatchObject({ - code: "duplicate_prompt_dedup_key", + code: "prompt_send_transient", }); const ok = envelope({ dedupKey: "d".repeat(64) }); @@ -150,9 +167,7 @@ describe("FakeSessionAdapter", () => { await waitForFile(ok.expectedArtifact); await adapter.rebootstrap(handle); - await expect(adapter.sendPrompt(handle, ok)).rejects.toMatchObject({ - code: "duplicate_prompt_dedup_key", - }); + await expect(adapter.sendPrompt(handle, ok)).resolves.toEqual({ promptId: "d".repeat(64) }); }); it("rejects prompts whose run or role do not match the session", async () => { @@ -207,6 +222,26 @@ describe("FakeSessionAdapter", () => { expect(existsSync(prompt.expectedArtifact)).toBe(false); }); + it("does not record dedup history when fixture resolution fails", async () => { + const fixtureRoot = mkdtempSync(join(tmpdir(), "devflow-empty-fake-fixtures-")); + tempRoots.push(fixtureRoot); + const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 }); + const handle = await adapter.start({ + runId, + roleId: "implementer", + backend: "fake", + cwd: fixtureRoot, + }); + const missing = envelope({ dedupKey: "f".repeat(64) }); + + await expect(adapter.sendPrompt(handle, missing)).rejects.toMatchObject({ + code: "fake_fixture_missing", + }); + await expect(adapter.sendPrompt(handle, missing)).rejects.toMatchObject({ + code: "fake_fixture_missing", + }); + }); + it("supports invalid, timeout, and crash sentinel scenarios", async () => { const fixtureRoot = makeFixtureRoot(); tempRoots.push(fixtureRoot); diff --git a/packages/session/src/fake.ts b/packages/session/src/fake.ts index fe9b2dc..2701191 100644 --- a/packages/session/src/fake.ts +++ b/packages/session/src/fake.ts @@ -21,7 +21,7 @@ import type { export interface FakeSessionAdapterOptions { fixtureRoot?: string; - writeDelayMs?: number; + writeDelayMs?: number | ((envelope: PromptEnvelope) => number); sessionIdFactory?: () => string; now?: () => Date; } @@ -40,14 +40,16 @@ interface FakeSessionRecord { export class FakeSessionAdapter implements SessionAdapter { private readonly fixtureRoot: string; - private readonly writeDelayMs: number; + private readonly writeDelayMs: (envelope: PromptEnvelope) => number; private readonly sessionIdFactory: () => string; private readonly now: () => Date; private readonly sessions = new Map(); constructor(options: FakeSessionAdapterOptions = {}) { this.fixtureRoot = options.fixtureRoot ?? defaultFixtureRoot(); - this.writeDelayMs = options.writeDelayMs ?? 50; + const writeDelayMs = options.writeDelayMs; + this.writeDelayMs = + typeof writeDelayMs === "function" ? writeDelayMs : () => writeDelayMs ?? 50; this.sessionIdFactory = options.sessionIdFactory ?? randomUUID; this.now = options.now ?? (() => new Date()); } @@ -55,7 +57,7 @@ export class FakeSessionAdapter implements SessionAdapter { async start(input: StartInput): Promise { if (input.backend !== "fake") { throw new DevflowError("FakeSessionAdapter only supports the fake backend", { - class: "fatal", + class: "human_required", code: "backend_unavailable", runId: input.runId, }); @@ -87,15 +89,10 @@ export class FakeSessionAdapter implements SessionAdapter { }); } if (record.sentDedupKeys.has(envelope.dedupKey)) { - throw new DevflowError("Duplicate prompt dedup key refused by fake session", { - class: "recoverable", - code: "duplicate_prompt_dedup_key", - runId: record.runId, - }); + return { promptId: envelope.dedupKey }; } const scenarioName = scenarioFromInstructions(envelope.instructions); - record.sentDedupKeys.add(envelope.dedupKey); if (scenarioName === "crash") { this.appendTranscript(record, `[fake] received prompt ${envelope.uuid}; crashing`); @@ -107,6 +104,7 @@ export class FakeSessionAdapter implements SessionAdapter { } if (scenarioName === "timeout") { + record.sentDedupKeys.add(envelope.dedupKey); this.appendTranscript(record, `[fake] received prompt ${envelope.uuid}; timeout`); return { promptId: envelope.dedupKey }; } @@ -121,9 +119,12 @@ export class FakeSessionAdapter implements SessionAdapter { envelope.runId, ); + record.sentDedupKeys.add(envelope.dedupKey); + + const writeDelayMs = this.writeDelayMs(envelope); this.appendTranscript( record, - `[fake] received prompt ${envelope.uuid}; will write ${envelope.expectedArtifact} in ${this.writeDelayMs}ms`, + `[fake] received prompt ${envelope.uuid}; will write ${envelope.expectedArtifact} in ${writeDelayMs}ms`, ); const timer = setTimeout(() => { record.timers.delete(timer); @@ -145,7 +146,7 @@ export class FakeSessionAdapter implements SessionAdapter { return; } this.appendTranscript(record, `[fake] wrote artifact ${envelope.expectedArtifact}`); - }, this.writeDelayMs); + }, writeDelayMs); record.timers.add(timer); return { promptId: envelope.dedupKey }; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e89ef8e..91325d7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -100,6 +100,21 @@ importers: specifier: 8.20.0 version: 8.20.0 + packages/run-engine: + dependencies: + '@devflow/core': + specifier: workspace:* + version: link:../core + '@devflow/db': + specifier: workspace:* + version: link:../db + '@devflow/session': + specifier: workspace:* + version: link:../session + drizzle-orm: + specifier: 0.45.2 + version: 0.45.2(@types/pg@8.20.0)(pg@8.20.0) + packages/session: dependencies: '@devflow/core': diff --git a/tsconfig.json b/tsconfig.json index 84f04cd..7d87fab 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -8,6 +8,7 @@ "references": [ { "path": "./packages/core" }, { "path": "./packages/db" }, + { "path": "./packages/run-engine" }, { "path": "./packages/session" }, { "path": "./apps/cli" } ] diff --git a/tsconfig.typecheck.json b/tsconfig.typecheck.json index a15bf93..924216b 100644 --- a/tsconfig.typecheck.json +++ b/tsconfig.typecheck.json @@ -10,6 +10,7 @@ "paths": { "@devflow/core": ["packages/core/src/index.ts"], "@devflow/db": ["packages/db/src/index.ts"], + "@devflow/run-engine": ["packages/run-engine/src/index.ts"], "@devflow/session": ["packages/session/src/index.ts"] } }, diff --git a/vitest.workspace.ts b/vitest.workspace.ts index 04f2cd1..cf7cfee 100644 --- a/vitest.workspace.ts +++ b/vitest.workspace.ts @@ -1,39 +1,31 @@ +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; import { defineWorkspace } from "vitest/config"; +const root = fileURLToPath(new URL(".", import.meta.url)); +const alias = { + "@devflow/core": resolve(root, "packages/core/src/index.ts"), + "@devflow/db": resolve(root, "packages/db/src/index.ts"), + "@devflow/run-engine": resolve(root, "packages/run-engine/src/index.ts"), + "@devflow/session": resolve(root, "packages/session/src/index.ts"), +}; + +function nodeProject(name: string, include: string[]) { + return { + resolve: { alias }, + test: { + name, + include, + environment: "node", + }, + }; +} + export default defineWorkspace([ - { - test: { - name: "root", - include: ["tests/**/*.test.ts"], - environment: "node", - }, - }, - { - test: { - name: "packages/db", - include: ["packages/db/src/**/*.test.ts"], - environment: "node", - }, - }, - { - test: { - name: "packages/core", - include: ["packages/core/src/**/*.test.ts"], - environment: "node", - }, - }, - { - test: { - name: "packages/session", - include: ["packages/session/src/**/*.test.ts"], - environment: "node", - }, - }, - { - test: { - name: "apps/cli", - include: ["apps/cli/src/**/*.test.ts"], - environment: "node", - }, - }, + nodeProject("root", ["tests/**/*.test.ts"]), + nodeProject("packages/db", ["packages/db/src/**/*.test.ts"]), + nodeProject("packages/core", ["packages/core/src/**/*.test.ts"]), + nodeProject("packages/session", ["packages/session/src/**/*.test.ts"]), + nodeProject("packages/run-engine", ["packages/run-engine/src/**/*.test.ts"]), + nodeProject("apps/cli", ["apps/cli/src/**/*.test.ts"]), ]);