feat(01-05): asset provenance validator + Zod sidecar schema + refused-sample fixture + PIPE-03 enforcement test (tmpdir-isolated)

- scripts/validate-assets.mjs: walks ASSETS_DIR (default 'assets'), requires every
  non-sidecar non-.gitkeep non-README file to carry a sibling <name>.provenance.json
  validating against Zod ProvenanceSchema (6 required fields per CLAUDE.md / AEST-08
  + optional provenance_schema_version per RESEARCH Open Question #2). Excludes
  assets/__samples__/refused/ so the proof-of-gate fixture passes the gate.
- assets/__samples__/refused/no-provenance.png: 1x1 transparent PNG with no sidecar;
  the gate-proof artifact per CONTEXT D-03.
- scripts/validate-assets.test.ts: Vitest integration test covering both cases.
  Positive: real /assets/ tree must exit 0. Negative: per-test-run mkdtemp under
  os.tmpdir() with one orphan PNG; runs validator with ASSETS_DIR pointing at the
  tmpdir; asserts exit 1 + clear error message + cleanup in afterAll. No risk of
  polluting the real /assets/ tree (BLOCKER 2 fix).
- vitest.config.ts: extend include glob to also pick up scripts/**/*.test.ts (Rule 3
  blocking fix — without this the new test file is invisible to vitest).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-08 23:29:34 -04:00
parent 1e99356b27
commit da3f55cb69
5 changed files with 142 additions and 1 deletions
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 68 B

+84
View File
@@ -0,0 +1,84 @@
#!/usr/bin/env node
// scripts/validate-assets.mjs — Phase 1 asset provenance gate (PIPE-03, AEST-08, AEST-09)
//
// Walks /assets/ (or process.env.ASSETS_DIR for tests), requires every non-sidecar
// non-.gitkeep file to have a sibling <filename>.provenance.json validating against
// ProvenanceSchema. Excludes /assets/__samples__/refused/ (which intentionally lacks
// sidecars to prove the gate).
//
// Per CONTEXT D-03: minimum-viable. No curator workflow, no two-stage promotion,
// no pre-commit hook. Sidecar + this script + CI is the entire pipeline.
//
// Per CONTEXT D-01: 6 required fields per CLAUDE.md provenance metadata.
// Per RESEARCH Open Question #2: optional provenance_schema_version for Phase 5 fwd-compat.
import { readdir, readFile } from 'node:fs/promises';
import { join, basename } from 'node:path';
import { z } from 'zod';
const ProvenanceSchema = z.object({
model_id: z.string().min(1),
checkpoint_hash: z.string().min(1),
prompt: z.string().min(1),
seed: z.union([z.string(), z.number()]),
sampler: z.string().min(1),
params: z.record(z.string(), z.unknown()),
provenance_schema_version: z.number().int().positive().optional(),
});
const ASSETS_DIR = process.env.ASSETS_DIR ?? 'assets';
// Refused-sample exclusion is relative to the *real* assets tree; tests pointing
// ASSETS_DIR at a tmpdir won't have these paths so the exclusion is harmless.
const REFUSED_PREFIXES = ['assets/__samples__/refused', 'assets/__test_fixtures__/refused'];
async function* walk(dir) {
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch (e) {
if (e.code === 'ENOENT') return;
throw e;
}
for (const entry of entries) {
const path = join(dir, entry.name);
if (entry.isDirectory()) {
yield* walk(path);
} else {
yield path;
}
}
}
function normalizePath(p) {
return p.replaceAll('\\', '/');
}
const errors = [];
let assetCount = 0;
for await (const path of walk(ASSETS_DIR)) {
const norm = normalizePath(path);
if (REFUSED_PREFIXES.some((r) => norm.startsWith(r))) continue;
if (norm.endsWith('.provenance.json')) continue;
if (basename(norm) === '.gitkeep') continue;
if (basename(norm) === 'README.md') continue;
assetCount++;
const sidecar = path + '.provenance.json';
try {
const raw = await readFile(sidecar, 'utf8');
const parsed = ProvenanceSchema.safeParse(JSON.parse(raw));
if (!parsed.success) {
errors.push(`${path}: provenance schema validation failed — ${parsed.error.message}`);
}
} catch (e) {
errors.push(`${path}: missing or unreadable provenance sidecar (${sidecar}): ${e.code ?? e.message}`);
}
}
if (errors.length) {
console.error('[provenance] validation failed:');
for (const err of errors) console.error(' ' + err);
process.exit(1);
}
console.log(`[provenance] all ${assetCount} assets carry valid provenance.`);
+57
View File
@@ -0,0 +1,57 @@
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import os from 'node:os';
const exec = promisify(execFile);
const SCRIPT = 'scripts/validate-assets.mjs';
describe('PIPE-03 / AEST-09: asset provenance gate', () => {
it('exits 0 against the real /assets/ tree (refused sample excluded)', async () => {
const result = await exec('node', [SCRIPT]);
expect(result.stdout).toMatch(/all \d+ assets carry valid provenance/);
});
describe('with an isolated tmpdir fixture missing provenance', () => {
let tmpDir: string;
let fixtureFile: string;
beforeAll(async () => {
// Per-test-run unique tmpdir under os.tmpdir() — isolated from /assets/,
// no risk of polluting the real tree even if the runner is killed mid-test.
tmpDir = await mkdtemp(join(os.tmpdir(), 'tlg-provenance-test-'));
fixtureFile = join(tmpDir, 'orphan.png');
// Tiny 1x1 PNG with no sidecar
const png = Buffer.from(
'89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4890000000d49444154789c63600100000005000146cd9c5d0000000049454e44ae426082',
'hex',
);
await writeFile(fixtureFile, png);
});
afterAll(async () => {
await rm(tmpDir, { recursive: true, force: true });
});
it('exits non-zero with a clear error message when ASSETS_DIR points at the fixture', async () => {
// Run the validator against the isolated tmpdir; the script reads ASSETS_DIR
// from process.env, so the orphan.png is the only file under inspection.
let exitCode = 0;
let combinedOutput = '';
try {
await exec('node', [SCRIPT], { env: { ...process.env, ASSETS_DIR: tmpDir } });
} catch (err: any) {
exitCode = err.code ?? -1;
combinedOutput = (err.stdout ?? '') + (err.stderr ?? '');
}
expect(exitCode).toBe(1);
expect(combinedOutput).toMatch(/validation failed/);
expect(combinedOutput).toMatch(/orphan\.png/);
expect(combinedOutput).toMatch(/missing.*provenance sidecar/i);
// Sanity check: silence the unused-var lint by referencing fixtureFile.
expect(fixtureFile).toContain('orphan.png');
});
});
});
+1 -1
View File
@@ -11,7 +11,7 @@ import { defineConfig } from 'vitest/config';
export default defineConfig({ export default defineConfig({
test: { test: {
environment: 'happy-dom', environment: 'happy-dom',
include: ['src/**/*.test.ts', 'src/**/*.test.tsx', 'scripts/**/*.test.mjs'], include: ['src/**/*.test.ts', 'src/**/*.test.tsx', 'scripts/**/*.test.mjs', 'scripts/**/*.test.ts'],
passWithNoTests: false, passWithNoTests: false,
globals: false, globals: false,
}, },