feat: add fake phase harness

This commit is contained in:
chungyeong
2026-05-10 16:48:52 +09:00
parent be0ddb6e4e
commit 64efeabd33
22 changed files with 5766 additions and 76 deletions

View File

@@ -9,7 +9,7 @@
"scripts": { "scripts": {
"build": "tsup src/index.ts --format esm --clean", "build": "tsup src/index.ts --format esm --clean",
"typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit",
"test": "vitest run" "test": "cd ../.. && vitest run --project apps/cli"
}, },
"dependencies": { "dependencies": {
"commander": "12.1.0", "commander": "12.1.0",

View File

@@ -1,4 +1,4 @@
# Devflow Implementation Plan v3 r4 # Devflow Implementation Plan v3 r9
## 0. Document Status ## 0. Document Status
@@ -11,6 +11,11 @@
- r2 applied CC-6 through CC-10. - r2 applied CC-6 through CC-10.
- r3 applied CC-11 through CC-15. - r3 applied CC-11 through CC-15.
- r4 applies CC-16 through CC-18. - r4 applies CC-16 through CC-18.
- r5 applies CC-19.
- r6 applies CC-20.
- r7 applies CC-21 through CC-23.
- r8 applies CC-24 through CC-26.
- r9 applies CC-27 through CC-28.
## 1. Stack Decisions ## 1. Stack Decisions
@@ -40,7 +45,7 @@
- Pre-commit: - Pre-commit:
- `lefthook`. - `lefthook`.
- Runs `biome check --write` on staged files. - Runs `biome check --write` on staged files.
- Runs `tsc -b --noEmit` on changed packages. - Runs `tsc -p tsconfig.typecheck.json --noEmit`.
- Runs related Vitest tests on changed packages. - Runs related Vitest tests on changed packages.
### 1.3 Database ### 1.3 Database
@@ -819,6 +824,7 @@ export interface TranscriptChunk {
### 8.3 Recovery Counters ### 8.3 Recovery Counters
- `sendPrompt` retry: 2. - `sendPrompt` retry: 2.
- Means one initial send plus two adapter-level retries, three physical send attempts max.
- `resume` retry: 2. - `resume` retry: 2.
- `rebootstrap` retry: 1. - `rebootstrap` retry: 1.
- artifact repair retry: 1. - artifact repair retry: 1.
@@ -882,7 +888,7 @@ const PromptEnvelope = z.object({
### 9.3 Rules ### 9.3 Rules
- Prompt identity is `dedupKey`. - Prompt identity is `dedupKey`.
- Adapter refuses duplicate `dedupKey` for the same session within a run lifetime. - Adapter treats duplicate `dedupKey` for the same session within a run lifetime as idempotent success and does not reprocess the prompt.
- `attempt` increments only when the engine intentionally re-sends after timeout or repair. - `attempt` increments only when the engine intentionally re-sends after timeout or repair.
- Adapter-level retry does not increment attempt. - Adapter-level retry does not increment attempt.
- Completion is never inferred from transcript text. - Completion is never inferred from transcript text.
@@ -1152,7 +1158,7 @@ Transitions:
| `awaiting_approval` | request_changes | `planning` | increment phase attempts | | `awaiting_approval` | request_changes | `planning` | increment phase attempts |
| `awaiting_approval` | timeout | `paused` | set `paused_from_state='awaiting_approval'` | | `awaiting_approval` | timeout | `paused` | set `paused_from_state='awaiting_approval'` |
| `executing` | phase ok, more phases | `executing` | next phase | | `executing` | phase ok, more phases | `executing` | next phase |
| `executing` | phase needs gate | `awaiting_approval` | request gate | | `executing` | normal workflow approval gate | `awaiting_approval` | request gate |
| `executing` | all phases done | `completed` | emit `run.completed`, write final report | | `executing` | all phases done | `completed` | emit `run.completed`, write final report |
| `executing` | unrecoverable error | `failed` | emit `run.failed` | | `executing` | unrecoverable error | `failed` | emit `run.failed` |
| `executing` | manual `pauseRun` | `paused` | set `paused_from_state='executing'` | | `executing` | manual `pauseRun` | `paused` | set `paused_from_state='executing'` |
@@ -1196,6 +1202,14 @@ Transitions:
| `awaiting_approval` | reject / abort | `failed` | | `awaiting_approval` | reject / abort | `failed` |
| `awaiting_approval` | request_changes | `running`, attempt + 1 | | `awaiting_approval` | request_changes | `running`, attempt + 1 |
Replay rules:
- `phase.started.payload.repair === true` marks that attempt as the single allowed repair attempt. Replaying that attempt MUST use repair instructions, `prompt.repaired`, and must not start a third attempt.
- Repair replay from `running` may reuse an existing `READY` / bootstrapped session even if `last_prompt_hash` still contains the previous attempt's prompt hash; current-attempt prompt send has not happened yet.
- If phase state is `validating` and no artifact row exists yet, replay re-reads and validates the current `expectedArtifactPath` instead of treating the state as corruption.
- If phase state is `validating` and artifact rows already exist for the same phase/path/schema, replay may reuse only an artifact row created at or after the current session `last_prompt_at`; older rows are treated as stale previous-attempt outputs and the file is revalidated.
- Session bootstrap DB row/state changes and `session.created` / `session.ready` events are written in one DB transaction after adapter start succeeds.
## 14. Approval State ## 14. Approval State
States: States:
@@ -1463,6 +1477,7 @@ Human required:
- `artifact_invalid_after_repair` - `artifact_invalid_after_repair`
- `artifact_timeout_exhausted` - `artifact_timeout_exhausted`
- `prompt_send_exhausted`
- `destructive_command_blocked` - `destructive_command_blocked`
- `secret_access_blocked` - `secret_access_blocked`
- `backend_unavailable` - `backend_unavailable`
@@ -1486,7 +1501,7 @@ Fatal:
Mapping: Mapping:
- recoverable -> retry; exhausted -> human_required. - recoverable -> retry; exhausted -> human_required.
- human_required -> run paused and gate created. - human_required / recovery gate -> run paused and gate created. This is distinct from normal workflow approval gates in §13.1, which use `awaiting_approval`.
- fatal -> run failed, sessions disposed, final report best-effort. - fatal -> run failed, sessions disposed, final report best-effort.
## 19. Concurrent Runs and Crash Recovery ## 19. Concurrent Runs and Crash Recovery
@@ -1721,6 +1736,16 @@ M5+:
| CC-16 | Prompt hash used phaseId but envelope uses phaseKey | prompt hash uses phaseKey | | CC-16 | Prompt hash used phaseId but envelope uses phaseKey | prompt hash uses phaseKey |
| CC-17 | abortRun transition too narrow | abort from any non-terminal run state | | CC-17 | abortRun transition too narrow | abort from any non-terminal run state |
| CC-18 | approval pending transition wording conflicted with pause epoch | pending can transition once per pending epoch; paused may unpause to pending | | CC-18 | approval pending transition wording conflicted with pause epoch | pending can transition once per pending epoch; paused may unpause to pending |
| CC-19 | `tsc -b --noEmit` is brittle with TypeScript 5.6 project references on clean worktrees | build still uses `tsc -b`; no-emit verification uses root `tsconfig.typecheck.json` |
| CC-20 | `sendPrompt` retry count was ambiguous against Temporal activity attempts | §8.3 now states retry budget means initial attempt plus retries; §15.2 remains Temporal-level attempts only |
| CC-21 | Duplicate prompt dedup handling conflicted with adapter retry idempotency | duplicate `dedupKey` returns idempotent success without reprocessing |
| CC-22 | Normal workflow approval gates and human-required recovery gates were easy to conflate | §13.1 names normal workflow gates; §18 keeps human_required recovery gates paused |
| CC-23 | Phase start and event append could diverge under retry/error | phase start and `phase.started` append occur in one DB transaction |
| CC-24 | Repair attempt replay lost repair prompt identity and one-repair budget | repair attempts are derived from `phase.started.payload.repair`, replay uses repair instructions and `prompt.repaired`, and cannot start attempt 3 |
| CC-25 | `validating` replay failed if crash happened before artifact row insert | replay revalidates the expected artifact file when state is `validating` but no artifact row exists |
| CC-26 | Session bootstrap state/events could diverge | session row/state and `session.created` / `session.ready` events are committed in one DB transaction |
| CC-27 | `validating` replay could reuse stale previous-attempt artifact rows | artifact-row replay requires `artifact.created_at >= tui_sessions.last_prompt_at`; otherwise the file is revalidated |
| CC-28 | repair `running` replay rejected existing READY sessions with previous attempt prompt hash | current-attempt repair prompt is considered unsent, so replay may reuse the session and send `prompt.repaired` |
### Future Open Questions ### Future Open Questions

View File

@@ -8,8 +8,8 @@
"types": "./dist/index.d.ts", "types": "./dist/index.d.ts",
"scripts": { "scripts": {
"build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json", "build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json",
"typecheck": "tsc -b --noEmit", "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit",
"test": "vitest run" "test": "cd ../.. && vitest run --project packages/core"
}, },
"dependencies": { "dependencies": {
"ajv": "8.17.1", "ajv": "8.17.1",

View File

@@ -1,9 +1,93 @@
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import { RunEventPayloadSchemas, RunEventTypeValues } from "./run-event.js"; import { RunEvent, RunEventPayloadSchemas, RunEventTypeValues } from "./run-event.js";
describe("run events", () => { describe("run events", () => {
it("keeps a payload schema for every closed run event type", () => { it("keeps a payload schema for every closed run event type", () => {
expect(Object.keys(RunEventPayloadSchemas).sort()).toEqual([...RunEventTypeValues].sort()); expect(Object.keys(RunEventPayloadSchemas).sort()).toEqual([...RunEventTypeValues].sort());
}); });
it("rejects malformed payloads for structured event families", () => {
expect(
RunEventPayloadSchemas["prompt.sent"].safeParse({
roleId: "implementer",
}).success,
).toBe(false);
expect(
RunEventPayloadSchemas["artifact.validated"].safeParse({
artifactId: "not-a-uuid",
hash: "not-a-sha",
path: "/tmp/spec.json",
schemaId: "dev/spec@1",
}).success,
).toBe(false);
expect(
RunEventPayloadSchemas["run.paused"].safeParse({
cause: "human_required:artifact_repair_failed",
}).success,
).toBe(false);
expect(RunEventPayloadSchemas["run.resumed"].safeParse({}).success).toBe(false);
expect(
RunEventPayloadSchemas["approval.resolved"].safeParse({
action: "pause",
approvalRequestId: "00000000-0000-4000-8000-000000000000",
}).success,
).toBe(false);
expect(
RunEventPayloadSchemas["approval.resolved"].safeParse({
action: "approve",
approvalRequestId: "00000000-0000-4000-8000-000000000000",
}).success,
).toBe(true);
expect(
RunEventPayloadSchemas["session.ready"].safeParse({
roleId: "implementer",
sessionId: "00000000-0000-4000-8000-000000000000",
}).success,
).toBe(false);
expect(
RunEventPayloadSchemas["session.failed"].safeParse({
sessionId: "00000000-0000-4000-8000-000000000000",
}).success,
).toBe(false);
expect(
RunEventPayloadSchemas["phase.started"].safeParse({
attempt: 0,
phaseKey: "implement",
}).success,
).toBe(false);
expect(
RunEventPayloadSchemas["artifact.expected"].safeParse({
attempt: 0,
path: "/tmp/spec.json",
schemaId: "dev/spec@1",
}).success,
).toBe(false);
expect(RunEventPayloadSchemas["phase.skipped"].safeParse({}).success).toBe(false);
expect(
RunEventPayloadSchemas["review.batch_recorded"].safeParse({
attempt: 0,
reviewerRole: "reviewer",
}).success,
).toBe(false);
});
it("binds exported RunEvent validation to each event type payload schema", () => {
expect(
RunEvent.safeParse({
type: "session.ready",
payload: {},
}).success,
).toBe(false);
expect(
RunEvent.safeParse({
type: "session.ready",
payload: {
recoveryAttempts: 0,
roleId: "implementer",
sessionId: "00000000-0000-4000-8000-000000000000",
},
}).success,
).toBe(true);
});
}); });

View File

@@ -1,5 +1,7 @@
import { z } from "zod"; import { z } from "zod";
import { ApprovalDecisionAction } from "./enums.js";
export const RunEventTypeValues = [ export const RunEventTypeValues = [
"run.created", "run.created",
"run.started", "run.started",
@@ -40,15 +42,146 @@ export const RunEventTypeValues = [
export const RunEventType = z.enum(RunEventTypeValues); export const RunEventType = z.enum(RunEventTypeValues);
export type RunEventType = z.infer<typeof RunEventType>; export type RunEventType = z.infer<typeof RunEventType>;
const payloadSchema = z.record(z.unknown()); const uuid = z.string().uuid();
const sha256 = z.string().regex(/^[a-f0-9]{64}$/);
const nonEmptyString = z.string().min(1);
const phaseAttempt = z.number().int().positive();
export const RunEventPayloadSchemas = Object.freeze( const looseObject = z.object({}).passthrough();
Object.fromEntries(RunEventTypeValues.map((type) => [type, payloadSchema])), const phasePayload = z
) as Readonly<Record<RunEventType, typeof payloadSchema>>; .object({
phaseKey: nonEmptyString,
export const RunEvent = z.object({ attempt: phaseAttempt,
type: RunEventType, })
payload: payloadSchema, .passthrough();
const promptPayload = z
.object({
roleId: nonEmptyString,
dedupKey: sha256,
})
.passthrough();
const artifactWaitPayload = z
.object({
path: nonEmptyString,
schemaId: nonEmptyString,
attempt: phaseAttempt,
})
.passthrough();
const artifactValidationPayload = z
.object({
artifactId: uuid,
hash: sha256,
path: nonEmptyString,
schemaId: nonEmptyString,
})
.passthrough();
const sessionBasePayload = z
.object({
sessionId: uuid,
roleId: nonEmptyString,
})
.passthrough();
const sessionPromptPayload = sessionBasePayload.extend({
dedupKey: sha256,
}); });
const sessionRecoveryPayload = sessionBasePayload.extend({
recoveryAttempts: z.number().int().nonnegative(),
});
const approvalRequestedPayload = z
.object({
approvalRequestId: uuid,
approvalIdempotencyKey: nonEmptyString,
gateKey: nonEmptyString,
})
.passthrough();
const approvalResolvedPayload = z
.object({
action: ApprovalDecisionAction,
approvalRequestId: uuid,
})
.passthrough();
const commandPayload = z
.object({
commandId: uuid,
})
.passthrough();
const findingVerifierResolvedPayload = z
.object({
findingId: uuid,
})
.passthrough();
const backtestIterationPayload = z
.object({
iterationId: uuid,
})
.passthrough();
const reviewBatchRecordedPayload = z
.object({
reviewerRole: nonEmptyString,
attempt: phaseAttempt,
})
.passthrough();
const runPausedPayload = z
.object({
cause: nonEmptyString,
pausedFromState: nonEmptyString,
})
.passthrough();
export const RunEventPayloadSchemas = Object.freeze({
"run.created": looseObject,
"run.started": looseObject,
"run.paused": runPausedPayload,
"run.resumed": runPausedPayload.pick({ cause: true }).passthrough(),
"run.completed": looseObject,
"run.failed": looseObject,
"run.aborted": looseObject,
"phase.started": phasePayload,
"phase.completed": phasePayload,
"phase.failed": phasePayload.extend({ reason: nonEmptyString.optional() }),
"phase.skipped": phasePayload,
"prompt.sent": promptPayload,
"prompt.repaired": promptPayload,
"artifact.expected": artifactWaitPayload,
"artifact.validated": artifactValidationPayload,
"artifact.invalid": artifactValidationPayload.extend({ errors: z.array(z.unknown()) }),
"artifact.timeout": artifactWaitPayload,
"approval.requested": approvalRequestedPayload,
"approval.resolved": approvalResolvedPayload,
"session.created": sessionBasePayload.extend({ backend: nonEmptyString }),
"session.ready": sessionRecoveryPayload,
"session.busy": sessionPromptPayload,
"session.idle": sessionPromptPayload,
"session.crashed": sessionRecoveryPayload,
"session.recovered": sessionRecoveryPayload,
"session.failed": sessionBasePayload,
"command.started": commandPayload,
"command.completed": commandPayload,
"command.failed": commandPayload,
"review.batch_recorded": reviewBatchRecordedPayload,
"finding.verifier_resolved": findingVerifierResolvedPayload,
"backtest.iteration_started": backtestIterationPayload,
"backtest.iteration_completed": backtestIterationPayload,
"backtest.objective_evaluated": backtestIterationPayload,
} satisfies Record<RunEventType, z.ZodTypeAny>) as Readonly<Record<RunEventType, z.ZodTypeAny>>;
export const RunEvent = z
.object({
type: RunEventType,
payload: z.unknown(),
})
.superRefine((event, ctx) => {
const payload = RunEventPayloadSchemas[event.type].safeParse(event.payload);
if (payload.success) {
return;
}
for (const issue of payload.error.issues) {
ctx.addIssue({
...issue,
path: ["payload", ...issue.path],
});
}
});
export type RunEvent = z.infer<typeof RunEvent>; export type RunEvent = z.infer<typeof RunEvent>;

View File

@@ -8,8 +8,8 @@
"types": "./dist/index.d.ts", "types": "./dist/index.d.ts",
"scripts": { "scripts": {
"build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json", "build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json",
"typecheck": "tsc -b --noEmit", "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit",
"test": "vitest run" "test": "cd ../.. && vitest run --project packages/db"
}, },
"dependencies": { "dependencies": {
"@devflow/core": "workspace:*", "@devflow/core": "workspace:*",

View File

@@ -1,3 +1,4 @@
export { createDbClient, type DbClient } from "./client.js"; export { createDbClient, type DbClient } from "./client.js";
export * from "./repositories/run-event.js";
export * from "./repositories/transcript.js"; export * from "./repositories/transcript.js";
export * from "./schema/index.js"; export * from "./schema/index.js";

View File

@@ -0,0 +1,278 @@
import {
DevflowError,
RunEventPayloadSchemas,
RunEventType,
type RunEventType as RunEventTypeName,
canonicalize,
} from "@devflow/core";
import { and, desc, eq, sql } from "drizzle-orm";
import type { DbClient } from "../client.js";
import { runEvents, runPhases } from "../schema/index.js";
type Database = DbClient["db"];
type TransactionDatabase = Parameters<Parameters<Database["transaction"]>[0]>[0];
export interface AppendRunEventInput {
runId: string;
phaseId?: string;
type: RunEventTypeName;
payload: Record<string, unknown>;
idempotencyKey: string;
}
export interface RunEventRow {
id: bigint;
runId: string;
phaseId: string | null;
seq: bigint;
type: string;
payload: unknown;
idempotencyKey: string;
ts: Date;
}
export class RunEventRepository {
constructor(private readonly db: Database) {}
async append(input: AppendRunEventInput): Promise<RunEventRow> {
return this.db.transaction(async (tx) => this.appendInTransaction(tx, input));
}
async appendInTransaction(
tx: TransactionDatabase,
input: AppendRunEventInput,
): Promise<RunEventRow> {
const type = RunEventType.parse(input.type);
const payload = RunEventPayloadSchemas[type].parse(input.payload) as Record<string, unknown>;
if (isPhaseScopedEvent(type) && input.phaseId === undefined) {
throw new DevflowError("Run event phase id is required for phase-scoped event", {
class: "fatal",
code: "internal_state_corruption",
runId: input.runId,
});
}
if (input.idempotencyKey.length === 0) {
throw new DevflowError("Run event idempotency key is required", {
class: "fatal",
code: "internal_state_corruption",
runId: input.runId,
...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }),
});
}
const expectedIdempotencyKey = expectedRunEventIdempotencyKey(input, type, payload);
if (expectedIdempotencyKey !== undefined && input.idempotencyKey !== expectedIdempotencyKey) {
throw new DevflowError("Run event idempotency key does not match event contract", {
class: "fatal",
code: "internal_state_corruption",
runId: input.runId,
...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }),
});
}
await tx.execute(
sql`SELECT pg_advisory_xact_lock(hashtext('devflow:run-events'), hashtext(${input.runId}))`,
);
if (input.phaseId !== undefined) {
const [phase] = await tx
.select({ id: runPhases.id })
.from(runPhases)
.where(and(eq(runPhases.id, input.phaseId), eq(runPhases.runId, input.runId)))
.limit(1);
if (phase === undefined) {
throw new DevflowError("Run event phase does not belong to run", {
class: "fatal",
code: "internal_state_corruption",
runId: input.runId,
phaseId: input.phaseId,
});
}
}
const existing = await tx
.select()
.from(runEvents)
.where(
and(eq(runEvents.runId, input.runId), eq(runEvents.idempotencyKey, input.idempotencyKey)),
)
.limit(1);
if (existing[0] !== undefined) {
assertIdempotentReplayMatches(input, type, payload, existing[0]);
return existing[0];
}
const latest = await tx
.select({ seq: runEvents.seq })
.from(runEvents)
.where(eq(runEvents.runId, input.runId))
.orderBy(desc(runEvents.seq))
.limit(1);
const seq = (latest[0]?.seq ?? 0n) + 1n;
const inserted = await tx
.insert(runEvents)
.values({
runId: input.runId,
phaseId: input.phaseId,
seq,
type,
payload,
idempotencyKey: input.idempotencyKey,
})
.returning();
const event = inserted[0];
if (event === undefined) {
throw new DevflowError("Run event insert returned no row", {
class: "fatal",
code: "internal_state_corruption",
runId: input.runId,
...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }),
});
}
return event;
}
}
function isPhaseScopedEvent(type: RunEventTypeName): boolean {
return (
type.startsWith("phase.") || type.startsWith("artifact.") || type === "review.batch_recorded"
);
}
function assertIdempotentReplayMatches(
input: AppendRunEventInput,
type: RunEventTypeName,
payload: Record<string, unknown>,
existing: RunEventRow,
) {
const sameType = existing.type === type;
const samePhase = !isPhaseScopedEvent(type) || existing.phaseId === (input.phaseId ?? null);
const samePayload = canonicalize(normalizeJson(existing.payload)) === canonicalize(payload);
if (sameType && samePhase && samePayload) {
return;
}
throw new DevflowError("Run event idempotency key replay does not match existing event", {
class: "fatal",
code: "internal_state_corruption",
runId: input.runId,
...(input.phaseId === undefined ? {} : { phaseId: input.phaseId }),
});
}
function expectedRunEventIdempotencyKey(
input: AppendRunEventInput,
type: RunEventTypeName,
payload: Record<string, unknown>,
): string | undefined {
switch (type) {
case "run.created":
case "run.started":
case "run.completed":
case "run.failed":
case "run.aborted":
return `${type}:${input.runId}`;
case "run.paused":
case "run.resumed":
return `${type}:${input.runId}:${stringPayload(payload, "cause")}`;
case "phase.started":
case "phase.completed":
case "phase.failed":
case "phase.skipped":
return `${type}:${requiredPhaseId(input)}:${numberPayload(payload, "attempt")}`;
case "prompt.sent":
case "prompt.repaired":
return `${type}:${stringPayload(payload, "dedupKey")}`;
case "artifact.expected":
case "artifact.timeout":
return `${type}:${requiredPhaseId(input)}:${numberPayload(payload, "attempt")}:${stringPayload(payload, "path")}`;
case "artifact.validated":
case "artifact.invalid":
return `${type}:${requiredPhaseId(input)}:${stringPayload(payload, "path")}:${stringPayload(payload, "hash")}`;
case "approval.requested":
return `approval.requested:${stringPayload(payload, "approvalIdempotencyKey")}`;
case "approval.resolved":
return `approval.resolved:${stringPayload(payload, "approvalRequestId")}:${stringPayload(payload, "action")}`;
case "session.created":
case "session.failed":
return `${type}:${stringPayload(payload, "sessionId")}`;
case "session.busy":
case "session.idle":
return `${type}:${stringPayload(payload, "sessionId")}:${stringPayload(payload, "dedupKey")}`;
case "session.ready":
case "session.crashed":
case "session.recovered":
return `${type}:${stringPayload(payload, "sessionId")}:${numberPayload(payload, "recoveryAttempts")}`;
case "command.started":
case "command.completed":
case "command.failed":
return `${type}:${stringPayload(payload, "commandId")}`;
case "review.batch_recorded":
return `review.batch_recorded:${requiredPhaseId(input)}:${stringPayload(payload, "reviewerRole")}:${numberPayload(payload, "attempt")}`;
case "finding.verifier_resolved":
return `finding.verifier_resolved:${stringPayload(payload, "findingId")}`;
case "backtest.iteration_started":
case "backtest.iteration_completed":
case "backtest.objective_evaluated":
return `${type}:${stringPayload(payload, "iterationId")}`;
default:
return undefined;
}
}
function requiredPhaseId(input: AppendRunEventInput): string {
if (input.phaseId === undefined) {
throw new DevflowError("Run event phase id is required for idempotency key", {
class: "fatal",
code: "internal_state_corruption",
runId: input.runId,
});
}
return input.phaseId;
}
function stringPayload(payload: Record<string, unknown>, key: string): string {
const value = payload[key];
if (typeof value !== "string" || value.length === 0) {
throw new DevflowError(`Run event payload is missing string field ${key}`, {
class: "fatal",
code: "internal_state_corruption",
});
}
return value;
}
function numberPayload(payload: Record<string, unknown>, key: string): number {
const value = payload[key];
if (typeof value !== "number" || !Number.isInteger(value)) {
throw new DevflowError(`Run event payload is missing integer field ${key}`, {
class: "fatal",
code: "internal_state_corruption",
});
}
return value;
}
function normalizeJson(value: unknown): unknown {
if (Array.isArray(value)) {
return value.map((item) => normalizeJson(item));
}
if (value !== null && typeof value === "object") {
return Object.fromEntries(
Object.entries(value as Record<string, unknown>).map(([key, child]) => [
key,
normalizeJson(child),
]),
);
}
return value;
}

View File

@@ -0,0 +1,20 @@
{
"name": "@devflow/run-engine",
"version": "0.0.0",
"private": true,
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
"types": "./dist/index.d.ts",
"scripts": {
"build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json",
"typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit",
"test": "cd ../.. && vitest run --project packages/run-engine"
},
"dependencies": {
"@devflow/core": "workspace:*",
"@devflow/db": "workspace:*",
"@devflow/session": "workspace:*",
"drizzle-orm": "0.45.2"
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
export * from "./fake-phase-harness.js";
export * from "./run-event-repository.js";

View File

@@ -0,0 +1 @@
export { RunEventRepository } from "@devflow/db";

View File

@@ -0,0 +1,10 @@
{
"extends": "./tsconfig.json",
"compilerOptions": {
"composite": false,
"emitDeclarationOnly": true,
"noEmit": false
},
"references": [],
"exclude": ["src/**/*.test.ts"]
}

View File

@@ -0,0 +1,10 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist",
"types": ["node", "vitest"]
},
"include": ["src/**/*.ts"],
"references": [{ "path": "../core" }, { "path": "../db" }, { "path": "../session" }]
}

View File

@@ -9,7 +9,7 @@
"scripts": { "scripts": {
"build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json", "build": "tsup src/index.ts --format esm,cjs --clean && tsc -p tsconfig.build.json",
"typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit", "typecheck": "tsc -p ../../tsconfig.typecheck.json --noEmit",
"test": "vitest run" "test": "cd ../.. && vitest run --project packages/session"
}, },
"dependencies": { "dependencies": {
"@devflow/core": "workspace:*" "@devflow/core": "workspace:*"

View File

@@ -97,7 +97,25 @@ describe("FakeSessionAdapter", () => {
expect(chunks.every((chunk, index) => chunk.seq === BigInt(index + 1))).toBe(true); expect(chunks.every((chunk, index) => chunk.seq === BigInt(index + 1))).toBe(true);
}); });
it("refuses duplicate prompt dedup keys for the same session", async () => { it("classifies unsupported backend as human-required backend_unavailable", async () => {
const fixtureRoot = makeFixtureRoot();
tempRoots.push(fixtureRoot);
const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 });
await expect(
adapter.start({
runId,
roleId: "implementer",
backend: "codex",
cwd: fixtureRoot,
}),
).rejects.toMatchObject({
class: "human_required",
code: "backend_unavailable",
});
});
it("treats duplicate prompt dedup keys as idempotent success without reprocessing", async () => {
const fixtureRoot = makeFixtureRoot(); const fixtureRoot = makeFixtureRoot();
tempRoots.push(fixtureRoot); tempRoots.push(fixtureRoot);
const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 }); const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 });
@@ -116,13 +134,12 @@ describe("FakeSessionAdapter", () => {
await adapter.sendPrompt(handle, first); await adapter.sendPrompt(handle, first);
await expect(adapter.sendPrompt(handle, duplicate)).rejects.toMatchObject({ await expect(adapter.sendPrompt(handle, duplicate)).resolves.toEqual({ promptId: dedupKey });
code: "duplicate_prompt_dedup_key",
});
await waitForFile(first.expectedArtifact); await waitForFile(first.expectedArtifact);
expect(existsSync(duplicate.expectedArtifact)).toBe(false);
}); });
it("preserves prompt dedup history across crash and rebootstrap recovery", async () => { it("records dedup history only after a fake prompt is accepted", async () => {
const fixtureRoot = makeFixtureRoot(); const fixtureRoot = makeFixtureRoot();
tempRoots.push(fixtureRoot); tempRoots.push(fixtureRoot);
const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 }); const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 });
@@ -141,7 +158,7 @@ describe("FakeSessionAdapter", () => {
code: "prompt_send_transient", code: "prompt_send_transient",
}); });
await expect(adapter.sendPrompt(handle, crash)).rejects.toMatchObject({ await expect(adapter.sendPrompt(handle, crash)).rejects.toMatchObject({
code: "duplicate_prompt_dedup_key", code: "prompt_send_transient",
}); });
const ok = envelope({ dedupKey: "d".repeat(64) }); const ok = envelope({ dedupKey: "d".repeat(64) });
@@ -150,9 +167,7 @@ describe("FakeSessionAdapter", () => {
await waitForFile(ok.expectedArtifact); await waitForFile(ok.expectedArtifact);
await adapter.rebootstrap(handle); await adapter.rebootstrap(handle);
await expect(adapter.sendPrompt(handle, ok)).rejects.toMatchObject({ await expect(adapter.sendPrompt(handle, ok)).resolves.toEqual({ promptId: "d".repeat(64) });
code: "duplicate_prompt_dedup_key",
});
}); });
it("rejects prompts whose run or role do not match the session", async () => { it("rejects prompts whose run or role do not match the session", async () => {
@@ -207,6 +222,26 @@ describe("FakeSessionAdapter", () => {
expect(existsSync(prompt.expectedArtifact)).toBe(false); expect(existsSync(prompt.expectedArtifact)).toBe(false);
}); });
it("does not record dedup history when fixture resolution fails", async () => {
const fixtureRoot = mkdtempSync(join(tmpdir(), "devflow-empty-fake-fixtures-"));
tempRoots.push(fixtureRoot);
const adapter = new FakeSessionAdapter({ fixtureRoot, writeDelayMs: 0 });
const handle = await adapter.start({
runId,
roleId: "implementer",
backend: "fake",
cwd: fixtureRoot,
});
const missing = envelope({ dedupKey: "f".repeat(64) });
await expect(adapter.sendPrompt(handle, missing)).rejects.toMatchObject({
code: "fake_fixture_missing",
});
await expect(adapter.sendPrompt(handle, missing)).rejects.toMatchObject({
code: "fake_fixture_missing",
});
});
it("supports invalid, timeout, and crash sentinel scenarios", async () => { it("supports invalid, timeout, and crash sentinel scenarios", async () => {
const fixtureRoot = makeFixtureRoot(); const fixtureRoot = makeFixtureRoot();
tempRoots.push(fixtureRoot); tempRoots.push(fixtureRoot);

View File

@@ -21,7 +21,7 @@ import type {
export interface FakeSessionAdapterOptions { export interface FakeSessionAdapterOptions {
fixtureRoot?: string; fixtureRoot?: string;
writeDelayMs?: number; writeDelayMs?: number | ((envelope: PromptEnvelope) => number);
sessionIdFactory?: () => string; sessionIdFactory?: () => string;
now?: () => Date; now?: () => Date;
} }
@@ -40,14 +40,16 @@ interface FakeSessionRecord {
export class FakeSessionAdapter implements SessionAdapter { export class FakeSessionAdapter implements SessionAdapter {
private readonly fixtureRoot: string; private readonly fixtureRoot: string;
private readonly writeDelayMs: number; private readonly writeDelayMs: (envelope: PromptEnvelope) => number;
private readonly sessionIdFactory: () => string; private readonly sessionIdFactory: () => string;
private readonly now: () => Date; private readonly now: () => Date;
private readonly sessions = new Map<string, FakeSessionRecord>(); private readonly sessions = new Map<string, FakeSessionRecord>();
constructor(options: FakeSessionAdapterOptions = {}) { constructor(options: FakeSessionAdapterOptions = {}) {
this.fixtureRoot = options.fixtureRoot ?? defaultFixtureRoot(); this.fixtureRoot = options.fixtureRoot ?? defaultFixtureRoot();
this.writeDelayMs = options.writeDelayMs ?? 50; const writeDelayMs = options.writeDelayMs;
this.writeDelayMs =
typeof writeDelayMs === "function" ? writeDelayMs : () => writeDelayMs ?? 50;
this.sessionIdFactory = options.sessionIdFactory ?? randomUUID; this.sessionIdFactory = options.sessionIdFactory ?? randomUUID;
this.now = options.now ?? (() => new Date()); this.now = options.now ?? (() => new Date());
} }
@@ -55,7 +57,7 @@ export class FakeSessionAdapter implements SessionAdapter {
async start(input: StartInput): Promise<SessionHandle> { async start(input: StartInput): Promise<SessionHandle> {
if (input.backend !== "fake") { if (input.backend !== "fake") {
throw new DevflowError("FakeSessionAdapter only supports the fake backend", { throw new DevflowError("FakeSessionAdapter only supports the fake backend", {
class: "fatal", class: "human_required",
code: "backend_unavailable", code: "backend_unavailable",
runId: input.runId, runId: input.runId,
}); });
@@ -87,15 +89,10 @@ export class FakeSessionAdapter implements SessionAdapter {
}); });
} }
if (record.sentDedupKeys.has(envelope.dedupKey)) { if (record.sentDedupKeys.has(envelope.dedupKey)) {
throw new DevflowError("Duplicate prompt dedup key refused by fake session", { return { promptId: envelope.dedupKey };
class: "recoverable",
code: "duplicate_prompt_dedup_key",
runId: record.runId,
});
} }
const scenarioName = scenarioFromInstructions(envelope.instructions); const scenarioName = scenarioFromInstructions(envelope.instructions);
record.sentDedupKeys.add(envelope.dedupKey);
if (scenarioName === "crash") { if (scenarioName === "crash") {
this.appendTranscript(record, `[fake] received prompt ${envelope.uuid}; crashing`); this.appendTranscript(record, `[fake] received prompt ${envelope.uuid}; crashing`);
@@ -107,6 +104,7 @@ export class FakeSessionAdapter implements SessionAdapter {
} }
if (scenarioName === "timeout") { if (scenarioName === "timeout") {
record.sentDedupKeys.add(envelope.dedupKey);
this.appendTranscript(record, `[fake] received prompt ${envelope.uuid}; timeout`); this.appendTranscript(record, `[fake] received prompt ${envelope.uuid}; timeout`);
return { promptId: envelope.dedupKey }; return { promptId: envelope.dedupKey };
} }
@@ -121,9 +119,12 @@ export class FakeSessionAdapter implements SessionAdapter {
envelope.runId, envelope.runId,
); );
record.sentDedupKeys.add(envelope.dedupKey);
const writeDelayMs = this.writeDelayMs(envelope);
this.appendTranscript( this.appendTranscript(
record, record,
`[fake] received prompt ${envelope.uuid}; will write ${envelope.expectedArtifact} in ${this.writeDelayMs}ms`, `[fake] received prompt ${envelope.uuid}; will write ${envelope.expectedArtifact} in ${writeDelayMs}ms`,
); );
const timer = setTimeout(() => { const timer = setTimeout(() => {
record.timers.delete(timer); record.timers.delete(timer);
@@ -145,7 +146,7 @@ export class FakeSessionAdapter implements SessionAdapter {
return; return;
} }
this.appendTranscript(record, `[fake] wrote artifact ${envelope.expectedArtifact}`); this.appendTranscript(record, `[fake] wrote artifact ${envelope.expectedArtifact}`);
}, this.writeDelayMs); }, writeDelayMs);
record.timers.add(timer); record.timers.add(timer);
return { promptId: envelope.dedupKey }; return { promptId: envelope.dedupKey };

15
pnpm-lock.yaml generated
View File

@@ -100,6 +100,21 @@ importers:
specifier: 8.20.0 specifier: 8.20.0
version: 8.20.0 version: 8.20.0
packages/run-engine:
dependencies:
'@devflow/core':
specifier: workspace:*
version: link:../core
'@devflow/db':
specifier: workspace:*
version: link:../db
'@devflow/session':
specifier: workspace:*
version: link:../session
drizzle-orm:
specifier: 0.45.2
version: 0.45.2(@types/pg@8.20.0)(pg@8.20.0)
packages/session: packages/session:
dependencies: dependencies:
'@devflow/core': '@devflow/core':

View File

@@ -8,6 +8,7 @@
"references": [ "references": [
{ "path": "./packages/core" }, { "path": "./packages/core" },
{ "path": "./packages/db" }, { "path": "./packages/db" },
{ "path": "./packages/run-engine" },
{ "path": "./packages/session" }, { "path": "./packages/session" },
{ "path": "./apps/cli" } { "path": "./apps/cli" }
] ]

View File

@@ -10,6 +10,7 @@
"paths": { "paths": {
"@devflow/core": ["packages/core/src/index.ts"], "@devflow/core": ["packages/core/src/index.ts"],
"@devflow/db": ["packages/db/src/index.ts"], "@devflow/db": ["packages/db/src/index.ts"],
"@devflow/run-engine": ["packages/run-engine/src/index.ts"],
"@devflow/session": ["packages/session/src/index.ts"] "@devflow/session": ["packages/session/src/index.ts"]
} }
}, },

View File

@@ -1,39 +1,31 @@
import { resolve } from "node:path";
import { fileURLToPath } from "node:url";
import { defineWorkspace } from "vitest/config"; import { defineWorkspace } from "vitest/config";
const root = fileURLToPath(new URL(".", import.meta.url));
const alias = {
"@devflow/core": resolve(root, "packages/core/src/index.ts"),
"@devflow/db": resolve(root, "packages/db/src/index.ts"),
"@devflow/run-engine": resolve(root, "packages/run-engine/src/index.ts"),
"@devflow/session": resolve(root, "packages/session/src/index.ts"),
};
function nodeProject(name: string, include: string[]) {
return {
resolve: { alias },
test: {
name,
include,
environment: "node",
},
};
}
export default defineWorkspace([ export default defineWorkspace([
{ nodeProject("root", ["tests/**/*.test.ts"]),
test: { nodeProject("packages/db", ["packages/db/src/**/*.test.ts"]),
name: "root", nodeProject("packages/core", ["packages/core/src/**/*.test.ts"]),
include: ["tests/**/*.test.ts"], nodeProject("packages/session", ["packages/session/src/**/*.test.ts"]),
environment: "node", nodeProject("packages/run-engine", ["packages/run-engine/src/**/*.test.ts"]),
}, nodeProject("apps/cli", ["apps/cli/src/**/*.test.ts"]),
},
{
test: {
name: "packages/db",
include: ["packages/db/src/**/*.test.ts"],
environment: "node",
},
},
{
test: {
name: "packages/core",
include: ["packages/core/src/**/*.test.ts"],
environment: "node",
},
},
{
test: {
name: "packages/session",
include: ["packages/session/src/**/*.test.ts"],
environment: "node",
},
},
{
test: {
name: "apps/cli",
include: ["apps/cli/src/**/*.test.ts"],
environment: "node",
},
},
]); ]);