diff --git a/assets/__samples__/refused/.gitkeep b/assets/__samples__/refused/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/assets/__samples__/refused/no-provenance.png b/assets/__samples__/refused/no-provenance.png new file mode 100644 index 0000000..cee3880 Binary files /dev/null and b/assets/__samples__/refused/no-provenance.png differ diff --git a/scripts/validate-assets.mjs b/scripts/validate-assets.mjs new file mode 100644 index 0000000..03314c8 --- /dev/null +++ b/scripts/validate-assets.mjs @@ -0,0 +1,84 @@ +#!/usr/bin/env node +// scripts/validate-assets.mjs — Phase 1 asset provenance gate (PIPE-03, AEST-08, AEST-09) +// +// Walks /assets/ (or process.env.ASSETS_DIR for tests), requires every non-sidecar +// non-.gitkeep file to have a sibling .provenance.json validating against +// ProvenanceSchema. Excludes /assets/__samples__/refused/ (which intentionally lacks +// sidecars to prove the gate). +// +// Per CONTEXT D-03: minimum-viable. No curator workflow, no two-stage promotion, +// no pre-commit hook. Sidecar + this script + CI is the entire pipeline. +// +// Per CONTEXT D-01: 6 required fields per CLAUDE.md provenance metadata. +// Per RESEARCH Open Question #2: optional provenance_schema_version for Phase 5 fwd-compat. + +import { readdir, readFile } from 'node:fs/promises'; +import { join, basename } from 'node:path'; +import { z } from 'zod'; + +const ProvenanceSchema = z.object({ + model_id: z.string().min(1), + checkpoint_hash: z.string().min(1), + prompt: z.string().min(1), + seed: z.union([z.string(), z.number()]), + sampler: z.string().min(1), + params: z.record(z.string(), z.unknown()), + provenance_schema_version: z.number().int().positive().optional(), +}); + +const ASSETS_DIR = process.env.ASSETS_DIR ?? 'assets'; +// Refused-sample exclusion is relative to the *real* assets tree; tests pointing +// ASSETS_DIR at a tmpdir won't have these paths so the exclusion is harmless. +const REFUSED_PREFIXES = ['assets/__samples__/refused', 'assets/__test_fixtures__/refused']; + +async function* walk(dir) { + let entries; + try { + entries = await readdir(dir, { withFileTypes: true }); + } catch (e) { + if (e.code === 'ENOENT') return; + throw e; + } + for (const entry of entries) { + const path = join(dir, entry.name); + if (entry.isDirectory()) { + yield* walk(path); + } else { + yield path; + } + } +} + +function normalizePath(p) { + return p.replaceAll('\\', '/'); +} + +const errors = []; +let assetCount = 0; + +for await (const path of walk(ASSETS_DIR)) { + const norm = normalizePath(path); + if (REFUSED_PREFIXES.some((r) => norm.startsWith(r))) continue; + if (norm.endsWith('.provenance.json')) continue; + if (basename(norm) === '.gitkeep') continue; + if (basename(norm) === 'README.md') continue; + + assetCount++; + const sidecar = path + '.provenance.json'; + try { + const raw = await readFile(sidecar, 'utf8'); + const parsed = ProvenanceSchema.safeParse(JSON.parse(raw)); + if (!parsed.success) { + errors.push(`${path}: provenance schema validation failed — ${parsed.error.message}`); + } + } catch (e) { + errors.push(`${path}: missing or unreadable provenance sidecar (${sidecar}): ${e.code ?? e.message}`); + } +} + +if (errors.length) { + console.error('[provenance] validation failed:'); + for (const err of errors) console.error(' ' + err); + process.exit(1); +} +console.log(`[provenance] all ${assetCount} assets carry valid provenance.`); diff --git a/scripts/validate-assets.test.ts b/scripts/validate-assets.test.ts new file mode 100644 index 0000000..5902f5e --- /dev/null +++ b/scripts/validate-assets.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import os from 'node:os'; + +const exec = promisify(execFile); +const SCRIPT = 'scripts/validate-assets.mjs'; + +describe('PIPE-03 / AEST-09: asset provenance gate', () => { + it('exits 0 against the real /assets/ tree (refused sample excluded)', async () => { + const result = await exec('node', [SCRIPT]); + expect(result.stdout).toMatch(/all \d+ assets carry valid provenance/); + }); + + describe('with an isolated tmpdir fixture missing provenance', () => { + let tmpDir: string; + let fixtureFile: string; + + beforeAll(async () => { + // Per-test-run unique tmpdir under os.tmpdir() — isolated from /assets/, + // no risk of polluting the real tree even if the runner is killed mid-test. + tmpDir = await mkdtemp(join(os.tmpdir(), 'tlg-provenance-test-')); + fixtureFile = join(tmpDir, 'orphan.png'); + // Tiny 1x1 PNG with no sidecar + const png = Buffer.from( + '89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4890000000d49444154789c63600100000005000146cd9c5d0000000049454e44ae426082', + 'hex', + ); + await writeFile(fixtureFile, png); + }); + + afterAll(async () => { + await rm(tmpDir, { recursive: true, force: true }); + }); + + it('exits non-zero with a clear error message when ASSETS_DIR points at the fixture', async () => { + // Run the validator against the isolated tmpdir; the script reads ASSETS_DIR + // from process.env, so the orphan.png is the only file under inspection. + let exitCode = 0; + let combinedOutput = ''; + try { + await exec('node', [SCRIPT], { env: { ...process.env, ASSETS_DIR: tmpDir } }); + } catch (err: any) { + exitCode = err.code ?? -1; + combinedOutput = (err.stdout ?? '') + (err.stderr ?? ''); + } + expect(exitCode).toBe(1); + expect(combinedOutput).toMatch(/validation failed/); + expect(combinedOutput).toMatch(/orphan\.png/); + expect(combinedOutput).toMatch(/missing.*provenance sidecar/i); + // Sanity check: silence the unused-var lint by referencing fixtureFile. + expect(fixtureFile).toContain('orphan.png'); + }); + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index d0bff50..22b35af 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -11,7 +11,7 @@ import { defineConfig } from 'vitest/config'; export default defineConfig({ test: { environment: 'happy-dom', - include: ['src/**/*.test.ts', 'src/**/*.test.tsx', 'scripts/**/*.test.mjs'], + include: ['src/**/*.test.ts', 'src/**/*.test.tsx', 'scripts/**/*.test.mjs', 'scripts/**/*.test.ts'], passWithNoTests: false, globals: false, },