Overhaul model system with multi-stage training, variants, benchmarks, and eval

Replace the single-stage training + flat capability score with a realistic AI development pipeline: pre-training with Chinchilla scaling laws, SFT with specializations, alignment with safety/capability tradeoffs (RLHF/DPO/Constitutional), model families with distillation/fine-tuning/quantization variants, named benchmark suite with compute-costing eval jobs, and segment-specific market quality. Phases 1-6 of the model rework plan: new types, engine rewrite, save migration, training events/risk system, concurrent training, variant creation, benchmark evaluation with leaderboard, and market integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-25 07:36:34 -04:00
parent fc1f371c8c
commit 4c1c0e9ff2
24 changed files with 2157 additions and 357 deletions
@@ -23,6 +23,66 @@ export const CAPABILITY_FORMULA = {
  efficiencyWeight: 0.1,
 };

+export const PRETRAINING_BASE_TICKS = 180;
+export const SFT_TIME_FRACTION = 0.10;
+export const SFT_COMPUTE_FRACTION = 0.06;
+export const ALIGNMENT_TIME_FRACTION = 0.08;
+export const ALIGNMENT_COMPUTE_FRACTION = 0.04;
+export const CHINCHILLA_OPTIMAL_RATIO = 20;
+
+export const MAX_CONCURRENT_TRAINING: Record<string, number> = {
+  startup: 1, scaleup: 2, bigtech: 4, agi: 8,
+};
+
+export const DISTILLATION_COMPUTE_FRACTION = 0.15;
+export const DISTILLATION_TIME_FRACTION = 0.20;
+export const DISTILLATION_BASE_RETENTION = 0.70;
+export const FINETUNE_COMPUTE_FRACTION = 0.03;
+export const FINETUNE_TIME_FRACTION = 0.08;
+export const QUANTIZATION_TICKS = 8;
+
+export const MOE_CAPABILITY_MULTIPLIER = 1.15;
+export const MOE_SPEED_MULTIPLIER = 1.3;
+export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400];
+export const CONTEXT_WINDOW_OPTIONS = [4, 8, 32, 128, 256, 1024];
+
+export const EVENT_BASE_PROBABILITY = 0.001;
+export const LOSS_SPIKE_DELAY_MIN = 5;
+export const LOSS_SPIKE_DELAY_MAX = 20;
+export const INSTABILITY_PROGRESS_LOSS_MIN = 0.05;
+export const INSTABILITY_PROGRESS_LOSS_MAX = 0.15;
+export const BREAKTHROUGH_CAPABILITY_BONUS_MIN = 5;
+export const BREAKTHROUGH_CAPABILITY_BONUS_MAX = 15;
+
+export const EMERGENT_CAPABILITY_THRESHOLDS: Record<number, string> = {
+  10: 'basic-reasoning',
+  50: 'chain-of-thought',
+  100: 'tool-use',
+  500: 'long-horizon-planning',
+};
+
+export const QUANTIZATION_CONFIGS: Record<string, { qualityRetention: number; speedMultiplier: number; costMultiplier: number }> = {
+  fp16: { qualityRetention: 1.00, speedMultiplier: 1.0, costMultiplier: 1.0 },
+  int8: { qualityRetention: 0.97, speedMultiplier: 1.8, costMultiplier: 0.55 },
+  int4: { qualityRetention: 0.90, speedMultiplier: 3.0, costMultiplier: 0.30 },
+  int2: { qualityRetention: 0.75, speedMultiplier: 5.0, costMultiplier: 0.15 },
+};
+
+export const ALIGNMENT_METHODS: Record<string, { safetyGain: number; capabilityLoss: number; baseRefusal: number; requiredResearch: string }> = {
+  rlhf: { safetyGain: 25, capabilityLoss: 5, baseRefusal: 0.10, requiredResearch: 'alignment-research' },
+  dpo: { safetyGain: 20, capabilityLoss: 2, baseRefusal: 0.05, requiredResearch: 'interpretability' },
+  constitutional: { safetyGain: 30, capabilityLoss: 4, baseRefusal: 0.14, requiredResearch: 'constitutional-ai' },
+};
+
+export const SFT_SPECIALIZATION_BONUSES: Record<string, Record<string, number>> = {
+  general:      { reasoning: 5, coding: 5, creative: 5, math: 5, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  code:         { reasoning: 0, coding: 15, creative: -3, math: 8, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  math:         { reasoning: 8, coding: 0, creative: -3, math: 15, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  creative:     { reasoning: 0, coding: -3, creative: 15, math: 0, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  multilingual: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 10, multimodal: 0, agents: 0, speed: -5, contextUtilization: 0 },
+  'tool-use':   { reasoning: 0, coding: 8, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 15, speed: -5, contextUtilization: 0 },
+};
+
 export const CONSUMER_BASE_GROWTH = 0.002;
 export const CONSUMER_QUALITY_GROWTH_MULTIPLIER = 0.01;
 export const CONSUMER_PRICE_ELASTICITY = -0.5;
@@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = {
  sfxVolume: 0.7,
 };

-export const SAVE_VERSION = 5;
+export const SAVE_VERSION = 6;
@@ -1,51 +1,239 @@
-export interface ModelsState {
-  trainedModels: TrainedModel[];
-  activeTraining: TrainingJob | null;
-  productLines: ProductLine[];
+import type { Era } from './gameState';
+import type { DataDomain } from './data';
+
+export type ArchitectureType = 'dense' | 'moe';
+
+export interface ModelArchitecture {
+  type: ArchitectureType;
+  totalParameters: number;
+  activeParameters: number;
+  expertCount?: number;
+  expertTopK?: number;
+  contextWindow: number;
+  vocabularySize: number;
 }

-export interface TrainedModel {
+export type DataMixAllocation = Record<DataDomain, number>;
+
+export type TrainingStage = 'pretraining' | 'sft' | 'alignment';
+export type TrainingJobStatus = 'active' | 'paused' | 'stalled' | 'completed' | 'failed';
+
+export interface TrainingPipeline {
  id: string;
-  name: string;
-  generation: number;
-  parameterCount: number;
-  trainingDataSize: number;
-  capabilities: ModelCapabilities;
-  safetyScore: number;
-  benchmarkScore: number;
-  tuning: ModelTuning;
-  isDeployed: boolean;
-  trainedAtTick: number;
+  familyId: string;
+  modelName: string;
+  architecture: ModelArchitecture;
+  dataMix: DataMixAllocation;
+  currentStage: TrainingStage;
+  stages: {
+    pretraining: PreTrainingConfig;
+    sft: SFTConfig | null;
+    alignment: AlignmentConfig | null;
+  };
+  status: TrainingJobStatus;
+  allocatedComputeFraction: number;
+  events: TrainingEvent[];
+  startedAtTick: number;
+}
+
+export interface PreTrainingConfig {
+  targetTokens: number;
+  processedTokens: number;
+  computeAllocated: number;
+  progressTicks: number;
+  totalTicks: number;
+  lossValue: number;
+  chinchillaRatio: number;
+  isComplete: boolean;
+}
+
+export type SFTSpecialization = 'general' | 'code' | 'math' | 'creative' | 'multilingual' | 'tool-use';
+
+export interface SFTConfig {
+  specializations: SFTSpecialization[];
+  progressTicks: number;
+  totalTicks: number;
+  isComplete: boolean;
+}
+
+export type AlignmentMethod = 'rlhf' | 'dpo' | 'constitutional';
+
+export interface AlignmentConfig {
+  method: AlignmentMethod;
+  safetyWeight: number;
+  helpfulnessWeight: number;
+  progressTicks: number;
+  totalTicks: number;
+  isComplete: boolean;
+}
+
+export type TrainingEventType =
+  | 'loss_spike'
+  | 'instability'
+  | 'breakthrough'
+  | 'emergent_capability'
+  | 'data_contamination'
+  | 'hardware_failure';
+
+export interface TrainingEvent {
+  id: string;
+  type: TrainingEventType;
+  tick: number;
+  severity: 'minor' | 'moderate' | 'major';
+  description: string;
+  resolved: boolean;
+  impact: {
+    ticksDelayed?: number;
+    progressLost?: number;
+    capabilityBonus?: number;
+    capabilityDomain?: keyof ModelCapabilities;
+  };
 }

 export interface ModelCapabilities {
  reasoning: number;
  coding: number;
  creative: number;
+  math: number;
+  knowledge: number;
  multimodal: number;
  agents: number;
  speed: number;
+  contextUtilization: number;
 }

-export interface ModelTuning {
-  preset: TuningPreset;
-  verbosity?: number;
-  safetyLevel?: number;
-  creativity?: number;
-  speedQuality?: number;
-  refusalRate?: number;
+export interface SafetyProfile {
+  overallSafety: number;
+  refusalRate: number;
+  harmAvoidance: number;
+  instructionFollowing: number;
+  honesty: number;
 }

-export type TuningPreset = 'helpful-safe' | 'max-capability' | 'enterprise' | 'creative';
+export interface BaseModel {
+  id: string;
+  familyId: string;
+  name: string;
+  architecture: ModelArchitecture;
+  dataMix: DataMixAllocation;
+  capabilities: ModelCapabilities;
+  safetyProfile: SafetyProfile;
+  rawCapability: number;
+  isDeployed: boolean;
+  trainedAtTick: number;
+  trainingCostTotal: number;
+  trainingStagesCompleted: TrainingStage[];
+}

-export interface TrainingJob {
-  modelName: string;
+export type VariantType = 'distilled' | 'fine-tuned' | 'quantized';
+export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2';
+
+export interface ModelVariant {
+  id: string;
+  familyId: string;
+  baseModelId: string;
+  name: string;
+  variantType: VariantType;
+  architecture: ModelArchitecture;
+  capabilities: ModelCapabilities;
+  safetyProfile: SafetyProfile;
+  isDeployed: boolean;
+  createdAtTick: number;
+  quantization?: QuantizationLevel;
+  distillationRetention?: number;
+  finetuneSpecialization?: SFTSpecialization;
+  costMultiplier: number;
+  speedMultiplier: number;
+}
+
+export interface ModelFamily {
+  id: string;
+  name: string;
  generation: number;
-  allocatedCompute: number;
-  allocatedDataTokens: number;
+  baseModelId: string | null;
+  variants: ModelVariant[];
+  createdAtTick: number;
+}
+
+export type VariantJobType = 'distillation' | 'fine-tuning' | 'quantization';
+
+export interface VariantCreationJob {
+  id: string;
+  familyId: string;
+  baseModelId: string;
+  jobType: VariantJobType;
+  config: DistillationConfig | FineTuneConfig | QuantizationConfig;
  progressTicks: number;
  totalTicks: number;
-  estimatedCapability: number;
+  allocatedComputeFraction: number;
+  status: 'active' | 'completed';
+}
+
+export interface DistillationConfig {
+  targetParameters: number;
+  targetArchitecture: ArchitectureType;
+  variantName: string;
+}
+
+export interface FineTuneConfig {
+  specialization: SFTSpecialization;
+  datasetIds: string[];
+  variantName: string;
+}
+
+export interface QuantizationConfig {
+  level: QuantizationLevel;
+  variantName: string;
+}
+
+export type BenchmarkCategory = 'reasoning' | 'coding' | 'math' | 'knowledge' | 'safety' | 'chat' | 'multimodal' | 'agents';
+
+export interface BenchmarkDefinition {
+  id: string;
+  name: string;
+  category: BenchmarkCategory;
+  description: string;
+  primaryCapability: keyof ModelCapabilities;
+  secondaryCapability?: keyof ModelCapabilities;
+  computeCost: number;
+  ticksToRun: number;
+  unlockedAtEra: Era;
+  marketRelevance: {
+    consumer: number;
+    enterprise: number;
+    developer: number;
+    research: number;
+  };
+}
+
+export interface BenchmarkResult {
+  benchmarkId: string;
+  modelId: string;
+  score: number;
+  ranAtTick: number;
+  rank?: number;
+}
+
+export interface EvalJob {
+  id: string;
+  modelId: string;
+  benchmarkIds: string[];
+  progressTicks: number;
+  totalTicks: number;
+  computeAllocated: number;
+  status: 'active' | 'completed';
+  results: BenchmarkResult[];
+}
+
+export type ProductLineType = 'text-api' | 'chat-product' | 'chat-free' | 'chat-enterprise' | 'code-api' | 'image' | 'agents-api';
+
+export interface ProductPricing {
+  inputTokenPrice: number;
+  outputTokenPrice: number;
+  thinkingTokenBudget: number;
+  cachingEnabled: boolean;
+  subscriptionPrice: number;
+  freeTokenAllowance: number;
 }

 export interface ProductLine {
@@ -57,20 +245,38 @@ export interface ProductLine {
  pricing: ProductPricing;
 }

-export type ProductLineType = 'text-api' | 'chat-product' | 'image' | 'code' | 'agents';
-
-export interface ProductPricing {
-  inputTokenPrice: number;
-  outputTokenPrice: number;
-  thinkingTokenBudget: number;
-  cachingEnabled: boolean;
-  subscriptionPrice: number;
-  freeTokenAllowance: number;
+export interface ModelsState {
+  families: ModelFamily[];
+  baseModels: BaseModel[];
+  activeTrainingPipelines: TrainingPipeline[];
+  variantJobs: VariantCreationJob[];
+  evalJobs: EvalJob[];
+  benchmarkResults: BenchmarkResult[];
+  productLines: ProductLine[];
+  bestDeployedModelScore: number;
+  bestDeployedSafetyScore: number;
 }

+export const DEFAULT_DATA_MIX: DataMixAllocation = {
+  web: 0.35,
+  books: 0.10,
+  code: 0.15,
+  scientific: 0.10,
+  conversation: 0.10,
+  multilingual: 0.05,
+  images: 0.05,
+  video: 0.03,
+  audio: 0.02,
+  synthetic: 0.05,
+};
+
 export const INITIAL_MODELS: ModelsState = {
-  trainedModels: [],
-  activeTraining: null,
+  families: [],
+  baseModels: [],
+  activeTrainingPipelines: [],
+  variantJobs: [],
+  evalJobs: [],
+  benchmarkResults: [],
  productLines: [
    {
      id: 'text-api',
@@ -103,4 +309,6 @@ export const INITIAL_MODELS: ModelsState = {
      },
    },
  ],
+  bestDeployedModelScore: 0,
+  bestDeployedSafetyScore: 0,
 };