Add game-simulation package with multi-run balance testing, fix stalled-pipeline trap

Adds a full simulation harness (game-simulation package) with greedy/random strategies, 36-metric diagnostics, multi-run orchestration via child processes, and a statistical interpreter. Includes 2.3x engine performance optimizations (research bonus caching, per-DC dirty tracking, reduced allocations in tick pipeline, single-pass loops). Fixes a critical balance bug where training pipelines stalled on insufficient VRAM would permanently block training slots — the engine never re-checked stalled pipelines, and the greedy strategy didn't pre-check VRAM requirements. This caused 20-25% of seeds to get stuck in Scale-up era. All three fixes (engine un-stalling, strategy VRAM pre-check, stalled pipeline cancellation) bring pass rate from 75% to 100% across 20 random seeds. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-26 06:11:26 -04:00
parent 283c7c7932
commit 102e05c8ba
51 changed files with 4294 additions and 132 deletions
@@ -0,0 +1,383 @@
+import { readFileSync } from 'node:fs';
+import { writeFileSync } from 'node:fs';
+
+const args = process.argv.slice(2);
+
+function getArg(name: string, defaultValue: string): string {
+  const idx = args.indexOf(`--${name}`);
+  return idx !== -1 && args[idx + 1] ? args[idx + 1] : defaultValue;
+}
+
+const summaryPath = getArg('summary', '');
+const outPath = getArg('out', '');
+
+if (!summaryPath) {
+  console.error('Usage: interpret --summary <path-to-multirun-summary.csv> [--out <path>]');
+  process.exit(1);
+}
+
+interface SummaryRow {
+  runId: number;
+  seed: number;
+  passed: boolean;
+  wallTimeMs: number;
+  finalEra: string;
+  finalMoney: number;
+  finalRevenue: number;
+  finalTotalRevenue: number;
+  finalCapability: number;
+  finalReputation: number;
+  finalSubscribers: number;
+  finalDevelopers: number;
+  finalHeadcount: number;
+  finalResearchCount: number;
+  finalModelsDeployed: number;
+  revenueStreamDiversity: number;
+  featureUtilization: Record<string, number>;
+  interconnectionOverall: number;
+  interconnections: Record<string, number>;
+  eraTransition_scaleup: number | null;
+  eraTransition_bigtech: number | null;
+  eraTransition_agi: number | null;
+  bankruptcyRisks: number;
+  sanityErrors: number;
+  failureReasons: string;
+}
+
+function parseSummaryCsv(content: string): SummaryRow[] {
+  const lines = content.trim().split('\n');
+  if (lines.length < 2) return [];
+  const headers = lines[0].split(',');
+  const rows: SummaryRow[] = [];
+
+  for (let i = 1; i < lines.length; i++) {
+    const values = parseCSVLine(lines[i]);
+    const get = (name: string): string => values[headers.indexOf(name)] ?? '';
+    const num = (name: string): number => { const v = get(name); return v === '' ? 0 : Number(v); };
+
+    const fuCategories: Record<string, number> = {};
+    const icLinks: Record<string, number> = {};
+
+    for (let h = 0; h < headers.length; h++) {
+      if (headers[h].startsWith('featureUtilization_')) {
+        fuCategories[headers[h].replace('featureUtilization_', '')] = Number(values[h]) || 0;
+      }
+      if (headers[h].startsWith('interconnection_') && headers[h] !== 'interconnection_overall') {
+        icLinks[headers[h].replace('interconnection_', '')] = Number(values[h]) || 0;
+      }
+    }
+
+    rows.push({
+      runId: num('runId'),
+      seed: num('seed'),
+      passed: num('passed') === 1,
+      wallTimeMs: num('wallTimeMs'),
+      finalEra: get('finalEra'),
+      finalMoney: num('finalMoney'),
+      finalRevenue: num('finalRevenue'),
+      finalTotalRevenue: num('finalTotalRevenue'),
+      finalCapability: num('finalCapability'),
+      finalReputation: num('finalReputation'),
+      finalSubscribers: num('finalSubscribers'),
+      finalDevelopers: num('finalDevelopers'),
+      finalHeadcount: num('finalHeadcount'),
+      finalResearchCount: num('finalResearchCount'),
+      finalModelsDeployed: num('finalModelsDeployed'),
+      revenueStreamDiversity: num('revenueStreamDiversity'),
+      featureUtilization: fuCategories,
+      interconnectionOverall: num('interconnection_overall'),
+      interconnections: icLinks,
+      eraTransition_scaleup: get('eraTransition_scaleup') ? num('eraTransition_scaleup') : null,
+      eraTransition_bigtech: get('eraTransition_bigtech') ? num('eraTransition_bigtech') : null,
+      eraTransition_agi: get('eraTransition_agi') ? num('eraTransition_agi') : null,
+      bankruptcyRisks: num('bankruptcyRisks'),
+      sanityErrors: num('sanityErrors'),
+      failureReasons: get('failureReasons').replace(/^"|"$/g, ''),
+    });
+  }
+  return rows;
+}
+
+function parseCSVLine(line: string): string[] {
+  const values: string[] = [];
+  let current = '';
+  let inQuotes = false;
+  for (let i = 0; i < line.length; i++) {
+    const ch = line[i];
+    if (inQuotes) {
+      if (ch === '"' && line[i + 1] === '"') {
+        current += '"';
+        i++;
+      } else if (ch === '"') {
+        inQuotes = false;
+      } else {
+        current += ch;
+      }
+    } else {
+      if (ch === '"') {
+        inQuotes = true;
+      } else if (ch === ',') {
+        values.push(current);
+        current = '';
+      } else {
+        current += ch;
+      }
+    }
+  }
+  values.push(current);
+  return values;
+}
+
+interface Stats {
+  mean: number;
+  median: number;
+  stddev: number;
+  min: number;
+  max: number;
+  p5: number;
+  p95: number;
+  cv: number;
+}
+
+function computeStats(values: number[]): Stats {
+  if (values.length === 0) return { mean: 0, median: 0, stddev: 0, min: 0, max: 0, p5: 0, p95: 0, cv: 0 };
+  const sorted = [...values].sort((a, b) => a - b);
+  const n = sorted.length;
+  const mean = sorted.reduce((a, b) => a + b, 0) / n;
+  const median = n % 2 === 0 ? (sorted[n / 2 - 1] + sorted[n / 2]) / 2 : sorted[Math.floor(n / 2)];
+  const variance = sorted.reduce((sum, v) => sum + (v - mean) ** 2, 0) / n;
+  const stddev = Math.sqrt(variance);
+  const min = sorted[0];
+  const max = sorted[n - 1];
+  const p5 = sorted[Math.floor(n * 0.05)] ?? min;
+  const p95 = sorted[Math.min(Math.floor(n * 0.95), n - 1)] ?? max;
+  const cv = mean !== 0 ? stddev / Math.abs(mean) : 0;
+  return { mean, median, stddev, min, max, p5, p95, cv };
+}
+
+function fmtNum(n: number, decimals = 1): string {
+  if (Math.abs(n) >= 1e9) return `${(n / 1e9).toFixed(decimals)}B`;
+  if (Math.abs(n) >= 1e6) return `${(n / 1e6).toFixed(decimals)}M`;
+  if (Math.abs(n) >= 1e3) return `${(n / 1e3).toFixed(decimals)}K`;
+  return n.toFixed(decimals);
+}
+
+function pad(s: string, w: number): string {
+  return s.padEnd(w);
+}
+
+function formatDuration(ticks: number): string {
+  const totalMinutes = Math.floor(ticks / 60);
+  if (totalMinutes < 60) return `${totalMinutes}m`;
+  const hours = Math.floor(totalMinutes / 60);
+  const mins = totalMinutes % 60;
+  return mins > 0 ? `${hours}h ${mins}m` : `${hours}h`;
+}
+
+function statsLine(label: string, s: Stats, formatter: (n: number) => string = n => fmtNum(n)): string {
+  const cvFlag = s.cv > 0.3 ? ' [HIGH VARIANCE]' : '';
+  return `  ${pad(label, 22)} mean=${pad(formatter(s.mean), 10)} median=${pad(formatter(s.median), 10)} stddev=${pad(formatter(s.stddev), 10)} range=[${formatter(s.min)}, ${formatter(s.max)}] p5=${formatter(s.p5)} p95=${formatter(s.p95)} CV=${s.cv.toFixed(2)}${cvFlag}`;
+}
+
+function generateReport(rows: SummaryRow[]): string {
+  const lines: string[] = [];
+  const n = rows.length;
+
+  lines.push('=== Multi-Run Interpretation Report ===');
+  lines.push('');
+
+  // 1. Run Overview
+  const passCount = rows.filter(r => r.passed).length;
+  const totalWallTime = rows.reduce((s, r) => s + r.wallTimeMs, 0);
+  lines.push('1. RUN OVERVIEW');
+  lines.push(`   Total runs: ${n}`);
+  lines.push(`   Pass rate: ${passCount}/${n} (${((passCount / n) * 100).toFixed(0)}%)`);
+  lines.push(`   Total wall time: ${(totalWallTime / 1000).toFixed(0)}s (avg ${(totalWallTime / n / 1000).toFixed(1)}s per run)`);
+
+  const failedRuns = rows.filter(r => !r.passed);
+  if (failedRuns.length > 0) {
+    lines.push(`   Failed seeds: ${failedRuns.map(r => r.seed).join(', ')}`);
+  }
+  lines.push('');
+
+  // 2. Statistical Summaries
+  lines.push('2. KEY METRICS');
+  const metricDefs: Array<{ label: string; getter: (r: SummaryRow) => number; fmt?: (n: number) => string }> = [
+    { label: 'Final Money', getter: r => r.finalMoney, fmt: n => `$${fmtNum(n)}` },
+    { label: 'Final Revenue/tick', getter: r => r.finalRevenue, fmt: n => `$${fmtNum(n)}` },
+    { label: 'Total Revenue', getter: r => r.finalTotalRevenue, fmt: n => `$${fmtNum(n)}` },
+    { label: 'Capability', getter: r => r.finalCapability, fmt: n => n.toFixed(1) },
+    { label: 'Reputation', getter: r => r.finalReputation, fmt: n => n.toFixed(1) },
+    { label: 'Subscribers', getter: r => r.finalSubscribers, fmt: n => fmtNum(n, 0) },
+    { label: 'API Developers', getter: r => r.finalDevelopers, fmt: n => fmtNum(n, 0) },
+    { label: 'Headcount', getter: r => r.finalHeadcount, fmt: n => String(Math.round(n)) },
+    { label: 'Research Count', getter: r => r.finalResearchCount, fmt: n => String(Math.round(n)) },
+    { label: 'Models Deployed', getter: r => r.finalModelsDeployed, fmt: n => String(Math.round(n)) },
+    { label: 'Revenue Streams', getter: r => r.revenueStreamDiversity, fmt: n => String(Math.round(n)) },
+  ];
+
+  const highVarianceMetrics: string[] = [];
+  for (const def of metricDefs) {
+    const values = rows.map(def.getter);
+    const s = computeStats(values);
+    lines.push(statsLine(def.label, s, def.fmt));
+    if (s.cv > 0.3) highVarianceMetrics.push(def.label);
+  }
+  lines.push('');
+
+  // 3. Era Transition Timing
+  lines.push('3. ERA TRANSITION TIMING');
+  const eraTransitions: Array<{ label: string; getter: (r: SummaryRow) => number | null }> = [
+    { label: 'Startup → Scale-up', getter: r => r.eraTransition_scaleup },
+    { label: 'Scale-up → Big Tech', getter: r => r.eraTransition_bigtech },
+    { label: 'Big Tech → AGI', getter: r => r.eraTransition_agi },
+  ];
+
+  const inconsistentEras: string[] = [];
+  for (const et of eraTransitions) {
+    const values = rows.map(et.getter).filter((v): v is number => v !== null);
+    const reached = values.length;
+    if (reached === 0) {
+      lines.push(`  ${pad(et.label, 24)} never reached`);
+      continue;
+    }
+    const s = computeStats(values);
+    const cvFlag = s.cv > 0.25 ? ' [INCONSISTENT]' : '';
+    if (s.cv > 0.25) inconsistentEras.push(et.label);
+    lines.push(`  ${pad(et.label, 24)} ${reached}/${n} reached | mean=${formatDuration(s.mean).padStart(6)} median=${formatDuration(s.median).padStart(6)} stddev=${Math.round(s.stddev).toString().padStart(5)}t range=[${formatDuration(s.min)}, ${formatDuration(s.max)}] CV=${s.cv.toFixed(2)}${cvFlag}`);
+  }
+  lines.push('');
+
+  // 4. Feature Utilization Consistency
+  lines.push('4. FEATURE UTILIZATION');
+  const fuCategories = Object.keys(rows[0]?.featureUtilization ?? {});
+  const consistentlyLow: string[] = [];
+  for (const cat of fuCategories) {
+    const values = rows.map(r => r.featureUtilization[cat] ?? 0);
+    const s = computeStats(values);
+    const bar = '#'.repeat(Math.round(s.mean / 5)) + '-'.repeat(20 - Math.round(s.mean / 5));
+    const flag = s.mean < 50 ? ' [LOW]' : '';
+    if (s.mean < 50) consistentlyLow.push(cat);
+    lines.push(`  ${pad(cat, 16)} [${bar}] mean=${s.mean.toFixed(0)}% stddev=${s.stddev.toFixed(1)}${flag}`);
+  }
+  lines.push('');
+
+  // 5. System Interconnections
+  lines.push('5. SYSTEM INTERCONNECTIONS');
+  const icKeys = Object.keys(rows[0]?.interconnections ?? {});
+  const weakLinks: string[] = [];
+  const deadLinks: string[] = [];
+  const inconsistentLinks: string[] = [];
+
+  {
+    const overallValues = rows.map(r => r.interconnectionOverall);
+    const overallStats = computeStats(overallValues);
+    lines.push(`  Overall score: mean=${overallStats.mean.toFixed(1)} stddev=${overallStats.stddev.toFixed(1)} range=[${overallStats.min.toFixed(1)}, ${overallStats.max.toFixed(1)}]`);
+  }
+
+  for (const key of icKeys) {
+    const values = rows.map(r => r.interconnections[key] ?? 0);
+    const s = computeStats(values);
+    const label = key.replace(/_/g, ' → ').replace(/([a-z])([A-Z])/g, '$1 $2');
+    const bar = '#'.repeat(Math.round(s.mean)) + '-'.repeat(10 - Math.round(s.mean));
+    let flag = '';
+    if (s.mean === 0) { flag = ' [DEAD]'; deadLinks.push(label); }
+    else if (s.mean < 3) { flag = ' [WEAK]'; weakLinks.push(label); }
+    if (s.stddev > 3) { flag += ' [INCONSISTENT]'; inconsistentLinks.push(label); }
+    lines.push(`  ${pad(label, 30)} [${bar}] mean=${s.mean.toFixed(1)} stddev=${s.stddev.toFixed(1)} min=${s.min}${flag}`);
+  }
+  lines.push('');
+
+  // 6. Failure Analysis
+  lines.push('6. FAILURE ANALYSIS');
+  const failureFreq: Record<string, number> = {};
+  for (const r of rows) {
+    if (!r.failureReasons) continue;
+    const seen = new Set<string>();
+    for (const reason of r.failureReasons.split('; ').filter(Boolean)) {
+      const normalized = reason.replace(/tick \d+/g, 'tick N').replace(/\d+ ticks/g, 'N ticks');
+      seen.add(normalized);
+    }
+    for (const normalized of seen) {
+      failureFreq[normalized] = (failureFreq[normalized] ?? 0) + 1;
+    }
+  }
+  const sortedFailures = Object.entries(failureFreq).sort((a, b) => b[1] - a[1]);
+  if (sortedFailures.length === 0) {
+    lines.push('  No failures detected across all runs.');
+  } else {
+    for (const [reason, count] of sortedFailures.slice(0, 10)) {
+      lines.push(`  ${((count / n) * 100).toFixed(0).padStart(3)}% (${count}/${n}) ${reason}`);
+    }
+  }
+
+  const bankruptcyRuns = rows.filter(r => r.bankruptcyRisks > 0).length;
+  if (bankruptcyRuns > 0) {
+    lines.push(`  Bankruptcy risk: ${bankruptcyRuns}/${n} runs (${((bankruptcyRuns / n) * 100).toFixed(0)}%)`);
+  }
+  const sanityFailRuns = rows.filter(r => r.sanityErrors > 0).length;
+  if (sanityFailRuns > 0) {
+    lines.push(`  Sanity errors: ${sanityFailRuns}/${n} runs (${((sanityFailRuns / n) * 100).toFixed(0)}%)`);
+  }
+  lines.push('');
+
+  // 7. Actionable Recommendations
+  lines.push('7. RECOMMENDATIONS');
+  const recs: string[] = [];
+
+  if (passCount / n < 0.8) {
+    const topFailure = sortedFailures[0];
+    if (topFailure) {
+      recs.push(`Balance is unstable — "${topFailure[0]}" occurs in ${((topFailure[1] / n) * 100).toFixed(0)}% of runs. This is the top priority fix.`);
+    }
+  }
+
+  for (const cat of consistentlyLow) {
+    recs.push(`Feature category "${cat}" has <50% utilization on average — review whether ${cat} features are reachable and worthwhile for the strategy.`);
+  }
+
+  for (const link of deadLinks) {
+    recs.push(`"${link}" has no measurable effect in any run — investment in the source doesn't translate to improvement in the target.`);
+  }
+  for (const link of weakLinks) {
+    recs.push(`"${link}" is consistently weak (mean <3/10) — the connection exists but is too faint to drive strategy.`);
+  }
+
+  for (const metric of highVarianceMetrics) {
+    const values = rows.map(metricDefs.find(d => d.label === metric)!.getter);
+    const s = computeStats(values);
+    recs.push(`"${metric}" is highly seed-dependent (CV=${s.cv.toFixed(2)}) — outcome is more luck than strategy. Consider tighter guardrails.`);
+  }
+
+  for (const era of inconsistentEras) {
+    recs.push(`"${era}" transition timing is inconsistent (CV>0.25) — suggests a fragile threshold crossing that depends on RNG luck.`);
+  }
+
+  if (passCount === n && recs.length === 0) {
+    recs.push('All runs passed with consistent results. Balance looks stable across seeds.');
+  }
+
+  for (let i = 0; i < recs.length; i++) {
+    lines.push(`  ${i + 1}. ${recs[i]}`);
+  }
+  lines.push('');
+
+  return lines.join('\n');
+}
+
+const csvContent = readFileSync(summaryPath, 'utf-8');
+const rows = parseSummaryCsv(csvContent);
+
+if (rows.length === 0) {
+  console.error('No data found in summary CSV.');
+  process.exit(1);
+}
+
+const report = generateReport(rows);
+
+if (outPath) {
+  writeFileSync(outPath, report);
+  console.log(`Report written to ${outPath}`);
+} else {
+  console.log(report);
+}