From 5aa943636827cf4110f0f4d6f390aa93f33c578d Mon Sep 17 00:00:00 2001 From: josh Date: Sun, 26 Apr 2026 20:55:49 -0400 Subject: [PATCH] Expand multirun reporting: health summary, era durations, serving diagnostics, cash-flow detail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Propagate per-era duration/bottleneck, serving utilization, cash-flow nadir/peak, and late-game revenue growth through the worker→CSV→interpret pipeline. Add simulation health archetype classification, per-era bottleneck frequency, unused-feature frequency table, failed-run AGI gate analysis, and log-scale variance for exponential metrics. All new CSV columns parse defensively for backward compatibility with older summary files. Co-Authored-By: Claude Opus 4.6 --- packages/game-simulation/src/interpret.ts | 336 ++++++++++++++++++++-- packages/game-simulation/src/multirun.ts | 33 ++- packages/game-simulation/src/worker.ts | 37 +++ 3 files changed, 386 insertions(+), 20 deletions(-) diff --git a/packages/game-simulation/src/interpret.ts b/packages/game-simulation/src/interpret.ts index f25aa8b..86f541a 100644 --- a/packages/game-simulation/src/interpret.ts +++ b/packages/game-simulation/src/interpret.ts @@ -16,6 +16,13 @@ if (!summaryPath) { process.exit(1); } +// --- AGI gate thresholds (must match ERA_THRESHOLDS in gameBalance.ts) --- +const AGI_GATES = { + capability: 93, + revenue: 1_000_000_000, + reputation: 80, +}; + interface SummaryRow { runId: number; seed: number; @@ -42,6 +49,24 @@ interface SummaryRow { bankruptcyRisks: number; sanityErrors: number; failureReasons: string; + // New pipeline fields (may be absent in old CSVs) + duration_startup: number | null; + duration_scaleup: number | null; + duration_bigtech: number | null; + duration_agi: number | null; + bottleneck_scaleup: string; + bottleneck_bigtech: string; + bottleneck_agi: string; + servingMeanUtil: number | null; + servingPctOverloaded: number | null; + servingPctUnderloaded: number | null; + servingPeakUtil: number | null; + cashMinAmount: number | null; + cashMinTick: number | null; + cashPeakAmount: number | null; + cashPeakTick: number | null; + lateGameRevenueGrowthRate: number | null; + unusedFeatures: string; } function parseSummaryCsv(content: string): SummaryRow[] { @@ -50,10 +75,20 @@ function parseSummaryCsv(content: string): SummaryRow[] { const headers = lines[0].split(','); const rows: SummaryRow[] = []; + const hasCol = (name: string): boolean => headers.indexOf(name) >= 0; + for (let i = 1; i < lines.length; i++) { const values = parseCSVLine(lines[i]); - const get = (name: string): string => values[headers.indexOf(name)] ?? ''; + const get = (name: string): string => { + const idx = headers.indexOf(name); + return idx >= 0 ? (values[idx] ?? '') : ''; + }; const num = (name: string): number => { const v = get(name); return v === '' ? 0 : Number(v); }; + const numOrNull = (name: string): number | null => { + if (!hasCol(name)) return null; + const v = get(name); + return v === '' ? null : Number(v); + }; const fuCategories: Record = {}; const icLinks: Record = {}; @@ -93,6 +128,23 @@ function parseSummaryCsv(content: string): SummaryRow[] { bankruptcyRisks: num('bankruptcyRisks'), sanityErrors: num('sanityErrors'), failureReasons: get('failureReasons').replace(/^"|"$/g, ''), + duration_startup: numOrNull('duration_startup'), + duration_scaleup: numOrNull('duration_scaleup'), + duration_bigtech: numOrNull('duration_bigtech'), + duration_agi: numOrNull('duration_agi'), + bottleneck_scaleup: get('bottleneck_scaleup'), + bottleneck_bigtech: get('bottleneck_bigtech'), + bottleneck_agi: get('bottleneck_agi'), + servingMeanUtil: numOrNull('servingMeanUtil'), + servingPctOverloaded: numOrNull('servingPctOverloaded'), + servingPctUnderloaded: numOrNull('servingPctUnderloaded'), + servingPeakUtil: numOrNull('servingPeakUtil'), + cashMinAmount: numOrNull('cashMinAmount'), + cashMinTick: numOrNull('cashMinTick'), + cashPeakAmount: numOrNull('cashPeakAmount'), + cashPeakTick: numOrNull('cashPeakTick'), + lateGameRevenueGrowthRate: numOrNull('lateGameRevenueGrowthRate'), + unusedFeatures: get('unusedFeatures').replace(/^"|"$/g, ''), }); } return rows; @@ -135,12 +187,15 @@ interface Stats { min: number; max: number; p5: number; + p25: number; + p75: number; p95: number; cv: number; + logCv: number; } function computeStats(values: number[]): Stats { - if (values.length === 0) return { mean: 0, median: 0, stddev: 0, min: 0, max: 0, p5: 0, p95: 0, cv: 0 }; + if (values.length === 0) return { mean: 0, median: 0, stddev: 0, min: 0, max: 0, p5: 0, p25: 0, p75: 0, p95: 0, cv: 0, logCv: 0 }; const sorted = [...values].sort((a, b) => a - b); const n = sorted.length; const mean = sorted.reduce((a, b) => a + b, 0) / n; @@ -150,9 +205,21 @@ function computeStats(values: number[]): Stats { const min = sorted[0]; const max = sorted[n - 1]; const p5 = sorted[Math.floor(n * 0.05)] ?? min; + const p25 = sorted[Math.floor(n * 0.25)] ?? min; + const p75 = sorted[Math.min(Math.floor(n * 0.75), n - 1)] ?? max; const p95 = sorted[Math.min(Math.floor(n * 0.95), n - 1)] ?? max; const cv = mean !== 0 ? stddev / Math.abs(mean) : 0; - return { mean, median, stddev, min, max, p5, p95, cv }; + + let logCv = 0; + const positiveValues = values.filter(v => v > 0); + if (positiveValues.length > 1) { + const logValues = positiveValues.map(v => Math.log(v)); + const logMean = logValues.reduce((a, b) => a + b, 0) / logValues.length; + const logVariance = logValues.reduce((sum, v) => sum + (v - logMean) ** 2, 0) / logValues.length; + logCv = logMean !== 0 ? Math.sqrt(logVariance) / Math.abs(logMean) : 0; + } + + return { mean, median, stddev, min, max, p5, p25, p75, p95, cv, logCv }; } function fmtNum(n: number, decimals = 1): string { @@ -174,33 +241,87 @@ function formatDuration(ticks: number): string { return mins > 0 ? `${hours}h ${mins}m` : `${hours}h`; } +function pct(n: number): string { + return `${(n * 100).toFixed(0)}%`; +} + +const MONETARY_LABELS = new Set(['Final Money', 'Final Revenue/tick', 'Total Revenue']); + function statsLine(label: string, s: Stats, formatter: (n: number) => string = n => fmtNum(n)): string { - const cvFlag = s.cv > 0.3 ? ' [HIGH VARIANCE]' : ''; - return ` ${pad(label, 22)} mean=${pad(formatter(s.mean), 10)} median=${pad(formatter(s.median), 10)} stddev=${pad(formatter(s.stddev), 10)} range=[${formatter(s.min)}, ${formatter(s.max)}] p5=${formatter(s.p5)} p95=${formatter(s.p95)} CV=${s.cv.toFixed(2)}${cvFlag}`; + const isMonetary = MONETARY_LABELS.has(label); + const varianceMeasure = isMonetary ? s.logCv : s.cv; + const varianceThreshold = isMonetary ? 0.15 : 0.3; + const varianceLabel = isMonetary ? 'logCV' : 'CV'; + const cvFlag = varianceMeasure > varianceThreshold ? ' [HIGH VARIANCE]' : ''; + return ` ${pad(label, 22)} mean=${pad(formatter(s.mean), 10)} median=${pad(formatter(s.median), 10)} stddev=${pad(formatter(s.stddev), 10)} range=[${formatter(s.min)}, ${formatter(s.max)}] p5=${formatter(s.p5)} p95=${formatter(s.p95)} ${varianceLabel}=${varianceMeasure.toFixed(2)}${cvFlag}`; +} + +function classifyRun(row: SummaryRow, totalTicks: number): string { + if (!row.passed) { + if (row.eraTransition_bigtech === null) return 'early_failure'; + return 'late_failure'; + } + const agiTick = row.eraTransition_agi; + if (agiTick !== null) { + if (agiTick < totalTicks * 0.5) return 'fast_pass'; + if (agiTick < totalTicks * 0.75) return 'clean_pass'; + return 'late_bloomer'; + } + return 'clean_pass'; } function generateReport(rows: SummaryRow[]): string { const lines: string[] = []; const n = rows.length; + const passCount = rows.filter(r => r.passed).length; + const failedRuns = rows.filter(r => !r.passed); + + // Estimate totalTicks from the max eraTransition or walltime pattern + const maxTransitionTick = Math.max( + ...rows.map(r => r.eraTransition_agi ?? r.eraTransition_bigtech ?? r.eraTransition_scaleup ?? 0), + ); + const estimatedTotalTicks = maxTransitionTick > 20000 ? 28800 : maxTransitionTick > 10000 ? 14400 : 7200; lines.push('=== Multi-Run Interpretation Report ==='); lines.push(''); + // 0. Simulation Health Summary + const archetypes: Record = {}; + for (const r of rows) { + const type = classifyRun(r, estimatedTotalTicks); + archetypes[type] = (archetypes[type] ?? 0) + 1; + } + + lines.push('0. SIMULATION HEALTH'); + const parts: string[] = []; + if (archetypes.fast_pass) parts.push(`${archetypes.fast_pass} fast`); + if (archetypes.clean_pass) parts.push(`${archetypes.clean_pass} clean`); + if (archetypes.late_bloomer) parts.push(`${archetypes.late_bloomer} late bloomers`); + const failCount = (archetypes.early_failure ?? 0) + (archetypes.late_failure ?? 0); + if (failCount > 0) parts.push(`${failCount} failed`); + lines.push(` ${n} runs: ${parts.join(', ')}`); + + if (failCount > 0) { + const failDetail: string[] = []; + if (archetypes.early_failure) failDetail.push(`${archetypes.early_failure} stuck before Big Tech`); + if (archetypes.late_failure) failDetail.push(`${archetypes.late_failure} stuck in Big Tech`); + lines.push(` Failures: ${failDetail.join(', ')}`); + } + lines.push(''); + // 1. Run Overview - const passCount = rows.filter(r => r.passed).length; const totalWallTime = rows.reduce((s, r) => s + r.wallTimeMs, 0); lines.push('1. RUN OVERVIEW'); lines.push(` Total runs: ${n}`); lines.push(` Pass rate: ${passCount}/${n} (${((passCount / n) * 100).toFixed(0)}%)`); lines.push(` Total wall time: ${(totalWallTime / 1000).toFixed(0)}s (avg ${(totalWallTime / n / 1000).toFixed(1)}s per run)`); - const failedRuns = rows.filter(r => !r.passed); if (failedRuns.length > 0) { lines.push(` Failed seeds: ${failedRuns.map(r => r.seed).join(', ')}`); } lines.push(''); - // 2. Statistical Summaries + // 2. Key Metrics lines.push('2. KEY METRICS'); const metricDefs: Array<{ label: string; getter: (r: SummaryRow) => number; fmt?: (n: number) => string }> = [ { label: 'Final Money', getter: r => r.finalMoney, fmt: n => `$${fmtNum(n)}` }, @@ -216,12 +337,23 @@ function generateReport(rows: SummaryRow[]): string { { label: 'Revenue Streams', getter: r => r.revenueStreamDiversity, fmt: n => String(Math.round(n)) }, ]; + // Add late-game revenue growth if available + const hasLateGrowth = rows.some(r => r.lateGameRevenueGrowthRate !== null); + if (hasLateGrowth) { + metricDefs.push({ + label: 'Late Rev Growth/tick', + getter: r => r.lateGameRevenueGrowthRate ?? 0, + fmt: n => pct(n), + }); + } + const highVarianceMetrics: string[] = []; for (const def of metricDefs) { const values = rows.map(def.getter); const s = computeStats(values); lines.push(statsLine(def.label, s, def.fmt)); - if (s.cv > 0.3) highVarianceMetrics.push(def.label); + const isMonetary = MONETARY_LABELS.has(def.label); + if (isMonetary ? s.logCv > 0.15 : s.cv > 0.3) highVarianceMetrics.push(def.label); } lines.push(''); @@ -244,11 +376,45 @@ function generateReport(rows: SummaryRow[]): string { const s = computeStats(values); const cvFlag = s.cv > 0.25 ? ' [INCONSISTENT]' : ''; if (s.cv > 0.25) inconsistentEras.push(et.label); - lines.push(` ${pad(et.label, 24)} ${reached}/${n} reached | mean=${formatDuration(s.mean).padStart(6)} median=${formatDuration(s.median).padStart(6)} stddev=${Math.round(s.stddev).toString().padStart(5)}t range=[${formatDuration(s.min)}, ${formatDuration(s.max)}] CV=${s.cv.toFixed(2)}${cvFlag}`); + lines.push(` ${pad(et.label, 24)} ${reached}/${n} reached | mean=${formatDuration(s.mean).padStart(6)} median=${formatDuration(s.median).padStart(6)} p25=${formatDuration(s.p25).padStart(6)} p75=${formatDuration(s.p75).padStart(6)} range=[${formatDuration(s.min)}, ${formatDuration(s.max)}] CV=${s.cv.toFixed(2)}${cvFlag}`); } lines.push(''); - // 4. Feature Utilization Consistency + // 3B. Per-Era Duration + const hasEraDurations = rows.some(r => r.duration_startup !== null); + if (hasEraDurations) { + lines.push('3B. PER-ERA DURATION'); + const eraDurations: Array<{ label: string; getter: (r: SummaryRow) => number | null; bottleneckGetter?: (r: SummaryRow) => string }> = [ + { label: 'Startup', getter: r => r.duration_startup }, + { label: 'Scale-up', getter: r => r.duration_scaleup, bottleneckGetter: r => r.bottleneck_scaleup }, + { label: 'Big Tech', getter: r => r.duration_bigtech, bottleneckGetter: r => r.bottleneck_bigtech }, + { label: 'AGI', getter: r => r.duration_agi, bottleneckGetter: r => r.bottleneck_agi }, + ]; + + for (const ed of eraDurations) { + const values = rows.map(ed.getter).filter((v): v is number => v !== null && v > 0); + if (values.length === 0) { + lines.push(` ${pad(ed.label, 14)} no data`); + continue; + } + const s = computeStats(values); + lines.push(` ${pad(ed.label, 14)} ${values.length}/${n} runs | mean=${formatDuration(s.mean).padStart(6)} median=${formatDuration(s.median).padStart(6)} p25=${formatDuration(s.p25).padStart(6)} p75=${formatDuration(s.p75).padStart(6)} range=[${formatDuration(s.min)}, ${formatDuration(s.max)}]`); + + if (ed.bottleneckGetter) { + const bottlenecks = rows.map(ed.bottleneckGetter).filter(Boolean); + if (bottlenecks.length > 0) { + const freq: Record = {}; + for (const b of bottlenecks) freq[b] = (freq[b] ?? 0) + 1; + const sorted = Object.entries(freq).sort((a, b) => b[1] - a[1]); + const parts = sorted.map(([gate, count]) => `${gate}: ${((count / bottlenecks.length) * 100).toFixed(0)}%`); + lines.push(` ${pad('', 14)} exit bottleneck: ${parts.join(', ')}`); + } + } + } + lines.push(''); + } + + // 4. Feature Utilization lines.push('4. FEATURE UTILIZATION'); const fuCategories = Object.keys(rows[0]?.featureUtilization ?? {}); const consistentlyLow: string[] = []; @@ -260,6 +426,25 @@ function generateReport(rows: SummaryRow[]): string { if (s.mean < 50) consistentlyLow.push(cat); lines.push(` ${pad(cat, 16)} [${bar}] mean=${s.mean.toFixed(0)}% stddev=${s.stddev.toFixed(1)}${flag}`); } + + // Unused features frequency table + const hasUnusedFeatures = rows.some(r => r.unusedFeatures.length > 0); + if (hasUnusedFeatures) { + const skipFreq: Record = {}; + for (const r of rows) { + for (const feat of r.unusedFeatures.split(';').filter(Boolean)) { + skipFreq[feat] = (skipFreq[feat] ?? 0) + 1; + } + } + const sortedSkips = Object.entries(skipFreq).sort((a, b) => b[1] - a[1]); + if (sortedSkips.length > 0) { + lines.push(''); + lines.push(' Most-skipped features:'); + for (const [feat, count] of sortedSkips.slice(0, 15)) { + lines.push(` ${String(count).padStart(3)}/${n} (${((count / n) * 100).toFixed(0).padStart(2)}%) ${feat}`); + } + } + } lines.push(''); // 5. System Interconnections @@ -267,7 +452,6 @@ function generateReport(rows: SummaryRow[]): string { const icKeys = Object.keys(rows[0]?.interconnections ?? {}); const weakLinks: string[] = []; const deadLinks: string[] = []; - const inconsistentLinks: string[] = []; { const overallValues = rows.map(r => r.interconnectionOverall); @@ -283,11 +467,46 @@ function generateReport(rows: SummaryRow[]): string { let flag = ''; if (s.mean === 0) { flag = ' [DEAD]'; deadLinks.push(label); } else if (s.mean < 3) { flag = ' [WEAK]'; weakLinks.push(label); } - if (s.stddev > 3) { flag += ' [INCONSISTENT]'; inconsistentLinks.push(label); } + if (s.stddev > 3) { flag += ' [INCONSISTENT]'; } lines.push(` ${pad(label, 30)} [${bar}] mean=${s.mean.toFixed(1)} stddev=${s.stddev.toFixed(1)} min=${s.min}${flag}`); } lines.push(''); + // 5B. Serving Infrastructure + const hasServing = rows.some(r => r.servingMeanUtil !== null); + let servingOverloaded = false; + let servingUnderloaded = false; + if (hasServing) { + lines.push('5B. SERVING INFRASTRUCTURE'); + const meanUtils = rows.map(r => r.servingMeanUtil).filter((v): v is number => v !== null); + const overloaded = rows.map(r => r.servingPctOverloaded).filter((v): v is number => v !== null); + const underloaded = rows.map(r => r.servingPctUnderloaded).filter((v): v is number => v !== null); + const peaks = rows.map(r => r.servingPeakUtil).filter((v): v is number => v !== null); + + if (meanUtils.length > 0) { + const sMean = computeStats(meanUtils); + const sOver = computeStats(overloaded); + const sUnder = computeStats(underloaded); + const sPeak = computeStats(peaks); + + lines.push(` Mean utilization: median=${pct(sMean.median).padStart(4)} mean=${pct(sMean.mean).padStart(4)} range=[${pct(sMean.min)}, ${pct(sMean.max)}]`); + lines.push(` % ticks overloaded: median=${pct(sOver.median).padStart(4)} mean=${pct(sOver.mean).padStart(4)} range=[${pct(sOver.min)}, ${pct(sOver.max)}]`); + lines.push(` % ticks underused: median=${pct(sUnder.median).padStart(4)} mean=${pct(sUnder.mean).padStart(4)} range=[${pct(sUnder.min)}, ${pct(sUnder.max)}]`); + lines.push(` Peak utilization: median=${pct(sPeak.median).padStart(4)} mean=${pct(sPeak.mean).padStart(4)} range=[${pct(sPeak.min)}, ${pct(sPeak.max)}]`); + + if (sOver.median > 0.5) { + lines.push(` >> Diagnosis: Chronic overload — demand exceeds capacity ${pct(sOver.median)} of the time`); + servingOverloaded = true; + } else if (sUnder.median > 0.5) { + lines.push(` >> Diagnosis: Chronic underutilization — capacity idle ${pct(sUnder.median)} of the time`); + servingUnderloaded = true; + } else if (sOver.median > 0.2 && sUnder.median > 0.2) { + lines.push(` >> Diagnosis: Volatile — swings between overload (${pct(sOver.median)}) and underuse (${pct(sUnder.median)})`); + } + } + lines.push(''); + } + // 6. Failure Analysis lines.push('6. FAILURE ANALYSIS'); const failureFreq: Record = {}; @@ -311,17 +530,65 @@ function generateReport(rows: SummaryRow[]): string { } } - const bankruptcyRuns = rows.filter(r => r.bankruptcyRisks > 0).length; - if (bankruptcyRuns > 0) { - lines.push(` Bankruptcy risk: ${bankruptcyRuns}/${n} runs (${((bankruptcyRuns / n) * 100).toFixed(0)}%)`); + // Failed run detail with gate analysis + if (failedRuns.length > 0) { + lines.push(''); + lines.push(' Failed run detail (vs AGI gates: capability≥93, revenue≥$1B, reputation≥80):'); + const gateBlockers: Record = {}; + for (const r of failedRuns) { + const gates = [ + { name: 'capability', current: r.finalCapability, required: AGI_GATES.capability, fmt: (n: number) => n.toFixed(1) }, + { name: 'revenue', current: r.finalTotalRevenue, required: AGI_GATES.revenue, fmt: (n: number) => `$${fmtNum(n)}` }, + { name: 'reputation', current: r.finalReputation, required: AGI_GATES.reputation, fmt: (n: number) => n.toFixed(1) }, + ]; + const unmet = gates.filter(g => g.current < g.required); + const blocking = unmet.length > 0 + ? unmet.reduce((a, b) => (a.current / a.required < b.current / b.required) ? a : b) + : null; + + const gateStrs = gates.map(g => { + const pctComplete = Math.min(100, (g.current / g.required) * 100); + const marker = g.current >= g.required ? '✓' : '✗'; + return `${g.name}=${g.fmt(g.current)} (${pctComplete.toFixed(0)}%) ${marker}`; + }); + + lines.push(` seed ${r.seed}: ${gateStrs.join(' | ')}${blocking ? ` — blocked by ${blocking.name}` : ''}`); + if (blocking) gateBlockers[blocking.name] = (gateBlockers[blocking.name] ?? 0) + 1; + } + if (Object.keys(gateBlockers).length > 0) { + const sorted = Object.entries(gateBlockers).sort((a, b) => b[1] - a[1]); + lines.push(` Blocking gate frequency: ${sorted.map(([g, c]) => `${g}: ${c}/${failedRuns.length}`).join(', ')}`); + } } + + // Cash-flow nadir + const hasCashNadir = rows.some(r => r.cashMinAmount !== null); + if (hasCashNadir) { + const nadirAmounts = rows.map(r => r.cashMinAmount).filter((v): v is number => v !== null); + const nadirTicks = rows.map(r => r.cashMinTick).filter((v): v is number => v !== null); + const peakAmounts = rows.map(r => r.cashPeakAmount).filter((v): v is number => v !== null); + if (nadirAmounts.length > 0) { + lines.push(''); + const sNadir = computeStats(nadirAmounts); + const sNadirTick = computeStats(nadirTicks); + const sPeak = computeStats(peakAmounts); + lines.push(` Cash nadir: median $${fmtNum(sNadir.median)} at ${formatDuration(sNadirTick.median)} | mean $${fmtNum(sNadir.mean)} at ${formatDuration(sNadirTick.mean)}`); + lines.push(` Cash peak: median $${fmtNum(sPeak.median)} | mean $${fmtNum(sPeak.mean)}`); + } + } else { + const bankruptcyRuns = rows.filter(r => r.bankruptcyRisks > 0).length; + if (bankruptcyRuns > 0) { + lines.push(` Bankruptcy risk: ${bankruptcyRuns}/${n} runs (${((bankruptcyRuns / n) * 100).toFixed(0)}%)`); + } + } + const sanityFailRuns = rows.filter(r => r.sanityErrors > 0).length; if (sanityFailRuns > 0) { lines.push(` Sanity errors: ${sanityFailRuns}/${n} runs (${((sanityFailRuns / n) * 100).toFixed(0)}%)`); } lines.push(''); - // 7. Actionable Recommendations + // 7. Recommendations lines.push('7. RECOMMENDATIONS'); const recs: string[] = []; @@ -343,16 +610,47 @@ function generateReport(rows: SummaryRow[]): string { recs.push(`"${link}" is consistently weak (mean <3/10) — the connection exists but is too faint to drive strategy.`); } + if (servingOverloaded) { + recs.push('Serving infrastructure is chronically overloaded — demand exceeds capacity for most of the game. Consider faster compute scaling or demand throttling.'); + } + if (servingUnderloaded) { + recs.push('Serving infrastructure is chronically underutilized — compute capacity vastly exceeds demand. Consider slowing infrastructure investment or accelerating user growth.'); + } + for (const metric of highVarianceMetrics) { - const values = rows.map(metricDefs.find(d => d.label === metric)!.getter); + const def = metricDefs.find(d => d.label === metric)!; + const values = rows.map(def.getter); const s = computeStats(values); - recs.push(`"${metric}" is highly seed-dependent (CV=${s.cv.toFixed(2)}) — outcome is more luck than strategy. Consider tighter guardrails.`); + const isMonetary = MONETARY_LABELS.has(metric); + const measure = isMonetary ? `logCV=${s.logCv.toFixed(2)}` : `CV=${s.cv.toFixed(2)}`; + recs.push(`"${metric}" is highly seed-dependent (${measure}) — outcome is more luck than strategy.`); } for (const era of inconsistentEras) { recs.push(`"${era}" transition timing is inconsistent (CV>0.25) — suggests a fragile threshold crossing that depends on RNG luck.`); } + // Gate-specific recommendation if failures share a common blocker + if (failedRuns.length > 0) { + const gateBlockers: Record = {}; + for (const r of failedRuns) { + const gates = [ + { name: 'capability', current: r.finalCapability, required: AGI_GATES.capability }, + { name: 'revenue', current: r.finalTotalRevenue, required: AGI_GATES.revenue }, + { name: 'reputation', current: r.finalReputation, required: AGI_GATES.reputation }, + ]; + const unmet = gates.filter(g => g.current < g.required); + const blocking = unmet.length > 0 + ? unmet.reduce((a, b) => (a.current / a.required < b.current / b.required) ? a : b) + : null; + if (blocking) gateBlockers[blocking.name] = (gateBlockers[blocking.name] ?? 0) + 1; + } + const dominant = Object.entries(gateBlockers).sort((a, b) => b[1] - a[1])[0]; + if (dominant && dominant[1] / failedRuns.length > 0.5) { + recs.push(`${dominant[1]}/${failedRuns.length} failures blocked by ${dominant[0]} gate — this is the primary balance bottleneck for AGI transition.`); + } + } + if (passCount === n && recs.length === 0) { recs.push('All runs passed with consistent results. Balance looks stable across seeds.'); } diff --git a/packages/game-simulation/src/multirun.ts b/packages/game-simulation/src/multirun.ts index 524ceaa..a17eb6f 100644 --- a/packages/game-simulation/src/multirun.ts +++ b/packages/game-simulation/src/multirun.ts @@ -66,8 +66,20 @@ interface WorkerResult { connections: Array<{ from: string; to: string; score: number; evidence: string; diagnosis: string; events: number; eventLabel: string }>; overallScore: number; }; - cashFlow: { bankruptcyRisks: number }; + cashFlow: { + bankruptcyRisks: number; + minCash: { amount: number; tick: number }; + peakCash: { amount: number; tick: number }; + }; sanityChecks: { passed: boolean; errorCount: number }; + perEraSummary: Array<{ era: string; durationTicks: number; bottleneckAtExit: string | null }>; + serving: { + meanUtilization: number; + pctOverloaded: number; + pctUnderloaded: number; + peakUtilization: number; + }; + lateGameRevenueGrowthRate: number; metrics: SimulationMetrics[]; } @@ -119,6 +131,12 @@ function buildSummaryCsv(results: WorkerResult[]): string { ...interconnectionKeys, 'eraTransition_scaleup', 'eraTransition_bigtech', 'eraTransition_agi', 'bankruptcyRisks', 'sanityErrors', 'failureReasons', + 'duration_startup', 'duration_scaleup', 'duration_bigtech', 'duration_agi', + 'bottleneck_scaleup', 'bottleneck_bigtech', 'bottleneck_agi', + 'servingMeanUtil', 'servingPctOverloaded', 'servingPctUnderloaded', 'servingPeakUtil', + 'cashMinAmount', 'cashMinTick', 'cashPeakAmount', 'cashPeakTick', + 'lateGameRevenueGrowthRate', + 'unusedFeatures', ]; const rows = results.map(r => { @@ -137,6 +155,13 @@ function buildSummaryCsv(results: WorkerResult[]): string { const icScores = ic.connections.map(c => c.score); + const durationMap: Record = { startup: '', scaleup: '', bigtech: '', agi: '' }; + const bottleneckMap: Record = { scaleup: '', bigtech: '', agi: '' }; + for (const es of r.perEraSummary) { + durationMap[es.era] = es.durationTicks; + if (es.bottleneckAtExit) bottleneckMap[es.era] = es.bottleneckAtExit; + } + return [ r.runId, r.seed, r.passed ? 1 : 0, r.wallTimeMs, fm?.era ?? '', fm?.money ?? '', fm?.revenue ?? '', fm?.totalRevenue ?? '', @@ -155,6 +180,12 @@ function buildSummaryCsv(results: WorkerResult[]): string { r.cashFlow.bankruptcyRisks, r.sanityChecks.errorCount, `"${r.failureReasons.join('; ').replace(/"/g, '""')}"`, + durationMap.startup, durationMap.scaleup, durationMap.bigtech, durationMap.agi, + bottleneckMap.scaleup, bottleneckMap.bigtech, bottleneckMap.agi, + r.serving.meanUtilization, r.serving.pctOverloaded, r.serving.pctUnderloaded, r.serving.peakUtilization, + r.cashFlow.minCash.amount, r.cashFlow.minCash.tick, r.cashFlow.peakCash.amount, r.cashFlow.peakCash.tick, + r.lateGameRevenueGrowthRate, + `"${fu.unusedFeatures.join(';').replace(/"/g, '""')}"`, ].join(','); }); diff --git a/packages/game-simulation/src/worker.ts b/packages/game-simulation/src/worker.ts index 75f7ffb..31fb50a 100644 --- a/packages/game-simulation/src/worker.ts +++ b/packages/game-simulation/src/worker.ts @@ -31,6 +31,29 @@ const result = runSimulation({ }); const report = generateJsonReport(result, { totalTicks, decisionInterval, strategy, seed }); +// Compute serving diagnostics from metrics timeseries +const utilizationValues = result.metrics.map(m => m.inferenceUtilization); +const servingMeanUtilization = utilizationValues.length > 0 + ? utilizationValues.reduce((a, b) => a + b, 0) / utilizationValues.length : 0; +const servingPctOverloaded = utilizationValues.length > 0 + ? utilizationValues.filter(v => v > 0.95).length / utilizationValues.length : 0; +const servingPctUnderloaded = utilizationValues.length > 0 + ? utilizationValues.filter(v => v < 0.20).length / utilizationValues.length : 0; +const servingPeakUtilization = utilizationValues.length > 0 + ? Math.max(...utilizationValues) : 0; + +// Compute late-game revenue growth rate from last 10% of metrics +const totalMetrics = result.metrics.length; +const lateStartIdx = Math.floor(totalMetrics * 0.9); +const lateMetrics = result.metrics.slice(lateStartIdx); +let lateGameRevenueGrowthRate = 0; +if (lateMetrics.length >= 2) { + const first = lateMetrics[0].totalRevenue; + const last = lateMetrics[lateMetrics.length - 1].totalRevenue; + const tickSpan = lateMetrics[lateMetrics.length - 1].tick - lateMetrics[0].tick; + lateGameRevenueGrowthRate = tickSpan > 0 && first > 0 ? (last - first) / first / tickSpan : 0; +} + const output = { runId, seed, @@ -50,11 +73,25 @@ const output = { }, cashFlow: { bankruptcyRisks: report.cashFlow.bankruptcyRisks.length, + minCash: report.cashFlow.minCash, + peakCash: report.cashFlow.peakCash, }, sanityChecks: { passed: report.sanityChecks.passed, errorCount: report.sanityChecks.violations.filter(v => v.severity === 'error').length, }, + perEraSummary: report.perEraSummary.map(es => ({ + era: es.era, + durationTicks: es.durationTicks, + bottleneckAtExit: es.bottleneckAtExit, + })), + serving: { + meanUtilization: servingMeanUtilization, + pctOverloaded: servingPctOverloaded, + pctUnderloaded: servingPctUnderloaded, + peakUtilization: servingPeakUtilization, + }, + lateGameRevenueGrowthRate, metrics: result.metrics, };