#!/usr/bin/env node // scripts/validate-assets.mjs — Phase 1 asset provenance gate (PIPE-03, AEST-08, AEST-09) // // Walks /assets/ (or process.env.ASSETS_DIR for tests), requires every non-sidecar // non-.gitkeep file to have a sibling .provenance.json validating against // ProvenanceSchema. Excludes /assets/__samples__/refused/ (which intentionally lacks // sidecars to prove the gate). // // Per CONTEXT D-03: minimum-viable. No curator workflow, no two-stage promotion, // no pre-commit hook. Sidecar + this script + CI is the entire pipeline. // // Per CONTEXT D-01: 6 required fields per CLAUDE.md provenance metadata. // Per RESEARCH Open Question #2: optional provenance_schema_version for Phase 5 fwd-compat. import { readdir, readFile } from 'node:fs/promises'; import { join, basename } from 'node:path'; import { z } from 'zod'; const ProvenanceSchema = z.object({ model_id: z.string().min(1), checkpoint_hash: z.string().min(1), prompt: z.string().min(1), seed: z.union([z.string(), z.number()]), sampler: z.string().min(1), params: z.record(z.string(), z.unknown()), provenance_schema_version: z.number().int().positive().optional(), }); const ASSETS_DIR = process.env.ASSETS_DIR ?? 'assets'; // Refused-sample exclusion is relative to the *real* assets tree; tests pointing // ASSETS_DIR at a tmpdir won't have these paths so the exclusion is harmless. const REFUSED_PREFIXES = ['assets/__samples__/refused', 'assets/__test_fixtures__/refused']; async function* walk(dir) { let entries; try { entries = await readdir(dir, { withFileTypes: true }); } catch (e) { if (e.code === 'ENOENT') return; throw e; } for (const entry of entries) { const path = join(dir, entry.name); if (entry.isDirectory()) { yield* walk(path); } else { yield path; } } } function normalizePath(p) { return p.replaceAll('\\', '/'); } const errors = []; let assetCount = 0; for await (const path of walk(ASSETS_DIR)) { const norm = normalizePath(path); if (REFUSED_PREFIXES.some((r) => norm.startsWith(r))) continue; if (norm.endsWith('.provenance.json')) continue; if (basename(norm) === '.gitkeep') continue; if (basename(norm) === 'README.md') continue; assetCount++; const sidecar = path + '.provenance.json'; try { const raw = await readFile(sidecar, 'utf8'); const parsed = ProvenanceSchema.safeParse(JSON.parse(raw)); if (!parsed.success) { errors.push(`${path}: provenance schema validation failed — ${parsed.error.message}`); } } catch (e) { errors.push(`${path}: missing or unreadable provenance sidecar (${sidecar}): ${e.code ?? e.message}`); } } if (errors.length) { console.error('[provenance] validation failed:'); for (const err of errors) console.error(' ' + err); process.exit(1); } console.log(`[provenance] all ${assetCount} assets carry valid provenance.`);