feat(01-05): asset provenance validator + Zod sidecar schema + refused-sample fixture + PIPE-03 enforcement test (tmpdir-isolated)
- scripts/validate-assets.mjs: walks ASSETS_DIR (default 'assets'), requires every non-sidecar non-.gitkeep non-README file to carry a sibling <name>.provenance.json validating against Zod ProvenanceSchema (6 required fields per CLAUDE.md / AEST-08 + optional provenance_schema_version per RESEARCH Open Question #2). Excludes assets/__samples__/refused/ so the proof-of-gate fixture passes the gate. - assets/__samples__/refused/no-provenance.png: 1x1 transparent PNG with no sidecar; the gate-proof artifact per CONTEXT D-03. - scripts/validate-assets.test.ts: Vitest integration test covering both cases. Positive: real /assets/ tree must exit 0. Negative: per-test-run mkdtemp under os.tmpdir() with one orphan PNG; runs validator with ASSETS_DIR pointing at the tmpdir; asserts exit 1 + clear error message + cleanup in afterAll. No risk of polluting the real /assets/ tree (BLOCKER 2 fix). - vitest.config.ts: extend include glob to also pick up scripts/**/*.test.ts (Rule 3 blocking fix — without this the new test file is invisible to vitest). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env node
|
||||
// scripts/validate-assets.mjs — Phase 1 asset provenance gate (PIPE-03, AEST-08, AEST-09)
|
||||
//
|
||||
// Walks /assets/ (or process.env.ASSETS_DIR for tests), requires every non-sidecar
|
||||
// non-.gitkeep file to have a sibling <filename>.provenance.json validating against
|
||||
// ProvenanceSchema. Excludes /assets/__samples__/refused/ (which intentionally lacks
|
||||
// sidecars to prove the gate).
|
||||
//
|
||||
// Per CONTEXT D-03: minimum-viable. No curator workflow, no two-stage promotion,
|
||||
// no pre-commit hook. Sidecar + this script + CI is the entire pipeline.
|
||||
//
|
||||
// Per CONTEXT D-01: 6 required fields per CLAUDE.md provenance metadata.
|
||||
// Per RESEARCH Open Question #2: optional provenance_schema_version for Phase 5 fwd-compat.
|
||||
|
||||
import { readdir, readFile } from 'node:fs/promises';
|
||||
import { join, basename } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
|
||||
const ProvenanceSchema = z.object({
|
||||
model_id: z.string().min(1),
|
||||
checkpoint_hash: z.string().min(1),
|
||||
prompt: z.string().min(1),
|
||||
seed: z.union([z.string(), z.number()]),
|
||||
sampler: z.string().min(1),
|
||||
params: z.record(z.string(), z.unknown()),
|
||||
provenance_schema_version: z.number().int().positive().optional(),
|
||||
});
|
||||
|
||||
const ASSETS_DIR = process.env.ASSETS_DIR ?? 'assets';
|
||||
// Refused-sample exclusion is relative to the *real* assets tree; tests pointing
|
||||
// ASSETS_DIR at a tmpdir won't have these paths so the exclusion is harmless.
|
||||
const REFUSED_PREFIXES = ['assets/__samples__/refused', 'assets/__test_fixtures__/refused'];
|
||||
|
||||
async function* walk(dir) {
|
||||
let entries;
|
||||
try {
|
||||
entries = await readdir(dir, { withFileTypes: true });
|
||||
} catch (e) {
|
||||
if (e.code === 'ENOENT') return;
|
||||
throw e;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
const path = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
yield* walk(path);
|
||||
} else {
|
||||
yield path;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function normalizePath(p) {
|
||||
return p.replaceAll('\\', '/');
|
||||
}
|
||||
|
||||
const errors = [];
|
||||
let assetCount = 0;
|
||||
|
||||
for await (const path of walk(ASSETS_DIR)) {
|
||||
const norm = normalizePath(path);
|
||||
if (REFUSED_PREFIXES.some((r) => norm.startsWith(r))) continue;
|
||||
if (norm.endsWith('.provenance.json')) continue;
|
||||
if (basename(norm) === '.gitkeep') continue;
|
||||
if (basename(norm) === 'README.md') continue;
|
||||
|
||||
assetCount++;
|
||||
const sidecar = path + '.provenance.json';
|
||||
try {
|
||||
const raw = await readFile(sidecar, 'utf8');
|
||||
const parsed = ProvenanceSchema.safeParse(JSON.parse(raw));
|
||||
if (!parsed.success) {
|
||||
errors.push(`${path}: provenance schema validation failed — ${parsed.error.message}`);
|
||||
}
|
||||
} catch (e) {
|
||||
errors.push(`${path}: missing or unreadable provenance sidecar (${sidecar}): ${e.code ?? e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (errors.length) {
|
||||
console.error('[provenance] validation failed:');
|
||||
for (const err of errors) console.error(' ' + err);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(`[provenance] all ${assetCount} assets carry valid provenance.`);
|
||||
Reference in New Issue
Block a user