Files
TheLastGarden/scripts/validate-assets.mjs
T
josh da3f55cb69 feat(01-05): asset provenance validator + Zod sidecar schema + refused-sample fixture + PIPE-03 enforcement test (tmpdir-isolated)
- scripts/validate-assets.mjs: walks ASSETS_DIR (default 'assets'), requires every
  non-sidecar non-.gitkeep non-README file to carry a sibling <name>.provenance.json
  validating against Zod ProvenanceSchema (6 required fields per CLAUDE.md / AEST-08
  + optional provenance_schema_version per RESEARCH Open Question #2). Excludes
  assets/__samples__/refused/ so the proof-of-gate fixture passes the gate.
- assets/__samples__/refused/no-provenance.png: 1x1 transparent PNG with no sidecar;
  the gate-proof artifact per CONTEXT D-03.
- scripts/validate-assets.test.ts: Vitest integration test covering both cases.
  Positive: real /assets/ tree must exit 0. Negative: per-test-run mkdtemp under
  os.tmpdir() with one orphan PNG; runs validator with ASSETS_DIR pointing at the
  tmpdir; asserts exit 1 + clear error message + cleanup in afterAll. No risk of
  polluting the real /assets/ tree (BLOCKER 2 fix).
- vitest.config.ts: extend include glob to also pick up scripts/**/*.test.ts (Rule 3
  blocking fix — without this the new test file is invisible to vitest).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 23:29:34 -04:00

85 lines
2.8 KiB
JavaScript

#!/usr/bin/env node
// scripts/validate-assets.mjs — Phase 1 asset provenance gate (PIPE-03, AEST-08, AEST-09)
//
// Walks /assets/ (or process.env.ASSETS_DIR for tests), requires every non-sidecar
// non-.gitkeep file to have a sibling <filename>.provenance.json validating against
// ProvenanceSchema. Excludes /assets/__samples__/refused/ (which intentionally lacks
// sidecars to prove the gate).
//
// Per CONTEXT D-03: minimum-viable. No curator workflow, no two-stage promotion,
// no pre-commit hook. Sidecar + this script + CI is the entire pipeline.
//
// Per CONTEXT D-01: 6 required fields per CLAUDE.md provenance metadata.
// Per RESEARCH Open Question #2: optional provenance_schema_version for Phase 5 fwd-compat.
import { readdir, readFile } from 'node:fs/promises';
import { join, basename } from 'node:path';
import { z } from 'zod';
const ProvenanceSchema = z.object({
model_id: z.string().min(1),
checkpoint_hash: z.string().min(1),
prompt: z.string().min(1),
seed: z.union([z.string(), z.number()]),
sampler: z.string().min(1),
params: z.record(z.string(), z.unknown()),
provenance_schema_version: z.number().int().positive().optional(),
});
const ASSETS_DIR = process.env.ASSETS_DIR ?? 'assets';
// Refused-sample exclusion is relative to the *real* assets tree; tests pointing
// ASSETS_DIR at a tmpdir won't have these paths so the exclusion is harmless.
const REFUSED_PREFIXES = ['assets/__samples__/refused', 'assets/__test_fixtures__/refused'];
async function* walk(dir) {
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch (e) {
if (e.code === 'ENOENT') return;
throw e;
}
for (const entry of entries) {
const path = join(dir, entry.name);
if (entry.isDirectory()) {
yield* walk(path);
} else {
yield path;
}
}
}
function normalizePath(p) {
return p.replaceAll('\\', '/');
}
const errors = [];
let assetCount = 0;
for await (const path of walk(ASSETS_DIR)) {
const norm = normalizePath(path);
if (REFUSED_PREFIXES.some((r) => norm.startsWith(r))) continue;
if (norm.endsWith('.provenance.json')) continue;
if (basename(norm) === '.gitkeep') continue;
if (basename(norm) === 'README.md') continue;
assetCount++;
const sidecar = path + '.provenance.json';
try {
const raw = await readFile(sidecar, 'utf8');
const parsed = ProvenanceSchema.safeParse(JSON.parse(raw));
if (!parsed.success) {
errors.push(`${path}: provenance schema validation failed — ${parsed.error.message}`);
}
} catch (e) {
errors.push(`${path}: missing or unreadable provenance sidecar (${sidecar}): ${e.code ?? e.message}`);
}
}
if (errors.length) {
console.error('[provenance] validation failed:');
for (const err of errors) console.error(' ' + err);
process.exit(1);
}
console.log(`[provenance] all ${assetCount} assets carry valid provenance.`);