Overhaul model system with multi-stage training, variants, benchmarks, and eval

Replace the single-stage training + flat capability score with a realistic AI development pipeline: pre-training with Chinchilla scaling laws, SFT with specializations, alignment with safety/capability tradeoffs (RLHF/DPO/Constitutional), model families with distillation/fine-tuning/quantization variants, named benchmark suite with compute-costing eval jobs, and segment-specific market quality. Phases 1-6 of the model rework plan: new types, engine rewrite, save migration, training events/risk system, concurrent training, variant creation, benchmark evaluation with leaderboard, and market integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-25 07:36:34 -04:00
parent fc1f371c8c
commit 4c1c0e9ff2
24 changed files with 2157 additions and 357 deletions
@@ -13,7 +13,7 @@ export const ACHIEVEMENT_DEFINITIONS: AchievementDefinition[] = [
    name: 'Hello World',
    description: 'Train your first AI model.',
    icon: 'Brain',
-    condition: { field: 'models.trainedModels.length', operator: 'gte', value: 1 },
+    condition: { field: 'models.baseModels.length', operator: 'gte', value: 1 },
  },
  {
    id: 'first-deploy',
@@ -0,0 +1,111 @@
+import type { BenchmarkDefinition } from '@ai-tycoon/shared';
+
+export const BENCHMARKS: BenchmarkDefinition[] = [
+  {
+    id: 'arc-challenge',
+    name: 'ARC Challenge',
+    category: 'reasoning',
+    description: 'Advanced reasoning and comprehension tasks requiring multi-step inference.',
+    primaryCapability: 'reasoning',
+    secondaryCapability: 'knowledge',
+    computeCost: 0.001,
+    ticksToRun: 8,
+    unlockedAtEra: 'startup',
+    marketRelevance: { consumer: 0.3, enterprise: 0.5, developer: 0.4, research: 0.8 },
+  },
+  {
+    id: 'codeforce',
+    name: 'CodeForce',
+    category: 'coding',
+    description: 'Competitive programming and software engineering benchmarks.',
+    primaryCapability: 'coding',
+    secondaryCapability: 'reasoning',
+    computeCost: 0.001,
+    ticksToRun: 8,
+    unlockedAtEra: 'startup',
+    marketRelevance: { consumer: 0.2, enterprise: 0.7, developer: 0.9, research: 0.5 },
+  },
+  {
+    id: 'mathquest',
+    name: 'MathQuest',
+    category: 'math',
+    description: 'Mathematical problem-solving from algebra to graduate-level proofs.',
+    primaryCapability: 'math',
+    secondaryCapability: 'reasoning',
+    computeCost: 0.001,
+    ticksToRun: 8,
+    unlockedAtEra: 'startup',
+    marketRelevance: { consumer: 0.1, enterprise: 0.6, developer: 0.5, research: 0.9 },
+  },
+  {
+    id: 'worldfacts',
+    name: 'WorldFacts',
+    category: 'knowledge',
+    description: 'Broad factual knowledge across science, history, culture, and current events.',
+    primaryCapability: 'knowledge',
+    secondaryCapability: 'reasoning',
+    computeCost: 0.001,
+    ticksToRun: 6,
+    unlockedAtEra: 'startup',
+    marketRelevance: { consumer: 0.5, enterprise: 0.4, developer: 0.3, research: 0.6 },
+  },
+  {
+    id: 'chatrank',
+    name: 'ChatRank',
+    category: 'chat',
+    description: 'Human preference evaluation of conversational quality, helpfulness, and creativity.',
+    primaryCapability: 'creative',
+    secondaryCapability: 'knowledge',
+    computeCost: 0.002,
+    ticksToRun: 10,
+    unlockedAtEra: 'startup',
+    marketRelevance: { consumer: 0.9, enterprise: 0.3, developer: 0.2, research: 0.2 },
+  },
+  {
+    id: 'harmguard',
+    name: 'HarmGuard',
+    category: 'safety',
+    description: 'Safety evaluation measuring harm avoidance, truthfulness, and responsible behavior.',
+    primaryCapability: 'reasoning',
+    computeCost: 0.001,
+    ticksToRun: 8,
+    unlockedAtEra: 'startup',
+    marketRelevance: { consumer: 0.4, enterprise: 0.9, developer: 0.3, research: 0.7 },
+  },
+  {
+    id: 'visionbench',
+    name: 'VisionBench',
+    category: 'multimodal',
+    description: 'Image understanding, visual reasoning, and multimodal comprehension.',
+    primaryCapability: 'multimodal',
+    secondaryCapability: 'reasoning',
+    computeCost: 0.003,
+    ticksToRun: 12,
+    unlockedAtEra: 'scaleup',
+    marketRelevance: { consumer: 0.5, enterprise: 0.6, developer: 0.6, research: 0.7 },
+  },
+  {
+    id: 'agentarena',
+    name: 'AgentArena',
+    category: 'agents',
+    description: 'Autonomous agent tasks: tool use, multi-step planning, and environment interaction.',
+    primaryCapability: 'agents',
+    secondaryCapability: 'coding',
+    computeCost: 0.005,
+    ticksToRun: 15,
+    unlockedAtEra: 'bigtech',
+    marketRelevance: { consumer: 0.3, enterprise: 0.8, developer: 0.7, research: 0.6 },
+  },
+  {
+    id: 'frontier-eval',
+    name: 'Frontier Eval',
+    category: 'reasoning',
+    description: 'Cutting-edge capability evaluation at the frontier of AI research.',
+    primaryCapability: 'reasoning',
+    secondaryCapability: 'math',
+    computeCost: 0.01,
+    ticksToRun: 20,
+    unlockedAtEra: 'agi',
+    marketRelevance: { consumer: 0.2, enterprise: 0.5, developer: 0.5, research: 1.0 },
+  },
+];
@@ -8,3 +8,4 @@ export { TECH_TREE } from './data/techTree';
 export { INITIAL_RIVALS } from './data/competitors';
 export { KEY_HIRE_POOL } from './data/keyHires';
 export { ACHIEVEMENT_DEFINITIONS } from './data/achievements';
+export { BENCHMARKS } from './data/benchmarks';
@@ -9,7 +9,7 @@ const ERA_INDEX: Record<string, number> = { startup: 0, scaleup: 1, bigtech: 2,

 function getFieldValue(state: GameState, field: string): number {
  if (field === 'meta._eraIndex') return ERA_INDEX[state.meta.currentEra] ?? 0;
-  if (field === 'meta._deployedModelCount') return state.models.trainedModels.filter(m => m.isDeployed).length;
+  if (field === 'meta._deployedModelCount') return state.models.baseModels.filter(m => m.isDeployed).length;
  const parts = field.split('.');
  let current: unknown = state;
  for (const part of parts) {
@@ -43,7 +43,7 @@ export function processCompetitors(state: GameState): CompetitorState {

  const allCaps = [
    ...rivals.filter(r => r.status === 'active').map(r => r.estimatedCapability),
-    state.models.trainedModels.reduce((best, m) => Math.max(best, m.benchmarkScore), 0),
+    state.models.bestDeployedModelScore,
  ];
  const industryBenchmark = allCaps.length > 0 ? Math.max(...allCaps) : 0;

@@ -22,7 +22,7 @@ export function processEconomy(
  const talentExpenses = state.talent.totalSalaryPerTick;
  const dataExpenses = state.data.partnerships.reduce((sum, p) => sum + p.costPerTick, 0);

-  const bestCapability = state.models.trainedModels.reduce((best, m) => Math.max(best, m.benchmarkScore), 0);
+  const bestCapability = state.models.bestDeployedModelScore;
  const eraIdx = ['startup', 'scaleup', 'bigtech', 'agi'].indexOf(state.meta.currentEra);
  const complianceCost = bestCapability > 30 ? bestCapability * REGULATION_COMPLIANCE_PER_CAPABILITY * (1 + eraIdx * 0.5) / 100 : 0;

@@ -11,9 +11,7 @@ export function checkEraTransition(state: GameState): Era | null {
  const thresholds = ERA_THRESHOLDS[nextEra as keyof typeof ERA_THRESHOLDS];
  if (!thresholds) return null;

-  const bestModel = state.models.trainedModels.reduce(
-    (best, m) => Math.max(best, m.benchmarkScore), 0,
-  );
+  const bestModel = state.models.bestDeployedModelScore;

  if (
    state.economy.totalRevenue >= thresholds.revenue &&
@@ -35,9 +35,6 @@ export function canRaiseFunding(state: GameState): { canRaise: boolean; nextRoun
 export function computeValuation(state: GameState): number {
  const revenueMultiple = state.economy.revenuePerTick * 86400 * 365;
  const subscriberValue = state.market.consumers.totalSubscribers * 500;
-  const capabilityValue = Math.pow(
-    Math.max(...state.models.trainedModels.map(m => m.benchmarkScore), 0),
-    2,
-  ) * 1000;
+  const capabilityValue = Math.pow(state.models.bestDeployedModelScore, 2) * 1000;
  return Math.max(100_000, revenueMultiple * 10 + subscriberValue + capabilityValue);
 }
@@ -1,4 +1,4 @@
-import type { GameState, MarketState } from '@ai-tycoon/shared';
+import type { GameState, MarketState, BenchmarkResult } from '@ai-tycoon/shared';
 import {
  CONSUMER_BASE_GROWTH,
  CONSUMER_QUALITY_GROWTH_MULTIPLIER,
@@ -13,6 +13,7 @@ import {
  MARKET_CAP_REPUTATION_BONUS,
  OVERLOAD_PENALTY_EXPONENT,
 } from '@ai-tycoon/shared';
+import { BENCHMARKS } from '../data/benchmarks';

 export interface MarketTickResult {
  marketState: MarketState;
@@ -21,12 +22,39 @@ export interface MarketTickResult {
  totalTokenDemand: number;
 }

-export function processMarket(state: GameState, currentTickCapacity: number): MarketTickResult {
-  const bestModel = state.models.trainedModels
-    .filter(m => m.isDeployed)
-    .sort((a, b) => b.benchmarkScore - a.benchmarkScore)[0];
+function getSegmentQuality(
+  segment: 'consumer' | 'enterprise' | 'developer' | 'research',
+  benchmarkResults: BenchmarkResult[],
+  fallbackScore: number,
+): number {
+  if (benchmarkResults.length === 0) return fallbackScore / 100;

-  const modelQuality = bestModel ? bestModel.benchmarkScore / 100 : 0;
+  const bestByBenchmark = new Map<string, number>();
+  for (const r of benchmarkResults) {
+    const prev = bestByBenchmark.get(r.benchmarkId) ?? 0;
+    if (r.score > prev) bestByBenchmark.set(r.benchmarkId, r.score);
+  }
+
+  let weightedSum = 0;
+  let totalWeight = 0;
+  for (const bench of BENCHMARKS) {
+    const score = bestByBenchmark.get(bench.id);
+    if (score == null) continue;
+    const weight = bench.marketRelevance[segment];
+    weightedSum += (score / 100) * weight;
+    totalWeight += weight;
+  }
+
+  if (totalWeight === 0) return fallbackScore / 100;
+  return weightedSum / totalWeight;
+}
+
+export function processMarket(state: GameState, currentTickCapacity: number): MarketTickResult {
+  const consumerQuality = getSegmentQuality('consumer', state.models.benchmarkResults, state.models.bestDeployedModelScore);
+  const enterpriseQuality = getSegmentQuality('enterprise', state.models.benchmarkResults, state.models.bestDeployedModelScore);
+  const modelQuality = state.models.benchmarkResults.length > 0
+    ? (consumerQuality + enterpriseQuality) / 2
+    : state.models.bestDeployedModelScore / 100;
  const chatProduct = state.models.productLines.find(p => p.type === 'chat-product');
  const textApi = state.models.productLines.find(p => p.type === 'text-api');

@@ -34,7 +62,7 @@ export function processMarket(state: GameState, currentTickCapacity: number): Ma
  const consumers = { ...state.market.consumers };
  let subscriptionRevenue = 0;

-  if (chatProduct?.isActive && bestModel) {
+  if (chatProduct?.isActive && modelQuality > 0) {
    const price = chatProduct.pricing.subscriptionPrice;
    const fairPrice = 20 + modelQuality * 80;
    const priceRatio = price / Math.max(1, fairPrice);
@@ -109,7 +137,7 @@ export function processMarket(state: GameState, currentTickCapacity: number): Ma
  let apiRevenue = 0;
  let organicApiTokens = 0;

-  if (textApi?.isActive && bestModel) {
+  if (textApi?.isActive && modelQuality > 0) {
    const reputationFactor = state.reputation.score / 100;
    const qualityFactor = modelQuality;
    const priceFactor = Math.max(0.1, 1 - (textApi.pricing.outputTokenPrice / 20));
@@ -1,21 +1,40 @@
-import type { GameState, ModelsState, TrainedModel, ModelCapabilities } from '@ai-tycoon/shared';
-import { uuid, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared';
+import type {
+  GameState, ModelsState, BaseModel, ModelCapabilities, SafetyProfile,
+  TrainingPipeline, TrainingEvent, TrainingEventType,
+  ModelVariant, VariantCreationJob, EvalJob, BenchmarkResult,
+  BenchmarkDefinition,
+} from '@ai-tycoon/shared';
+import { BENCHMARKS } from '../data/benchmarks';
+import {
+  uuid, VRAM_REQUIREMENTS_BY_GENERATION,
+  SFT_TIME_FRACTION, SFT_COMPUTE_FRACTION,
+  ALIGNMENT_TIME_FRACTION, ALIGNMENT_COMPUTE_FRACTION,
+  MOE_CAPABILITY_MULTIPLIER, MOE_SPEED_MULTIPLIER,
+  EVENT_BASE_PROBABILITY,
+  LOSS_SPIKE_DELAY_MIN, LOSS_SPIKE_DELAY_MAX,
+  INSTABILITY_PROGRESS_LOSS_MIN, INSTABILITY_PROGRESS_LOSS_MAX,
+  BREAKTHROUGH_CAPABILITY_BONUS_MIN, BREAKTHROUGH_CAPABILITY_BONUS_MAX,
+  EMERGENT_CAPABILITY_THRESHOLDS,
+  ALIGNMENT_METHODS,
+  SFT_SPECIALIZATION_BONUSES,
+  QUANTIZATION_CONFIGS,
+  DISTILLATION_BASE_RETENTION,
+  QUANTIZATION_TICKS,
+} from '@ai-tycoon/shared';

 export interface ModelTickResult {
  modelsState: ModelsState;
-  modelCompleted: TrainedModel | null;
+  completedModels: BaseModel[];
+  notifications: { title: string; message: string; type: 'success' | 'warning' | 'info' }[];
 }

 export function processModels(state: GameState): ModelTickResult {
-  const active = state.models.activeTraining;
-  if (!active) {
-    return { modelsState: state.models, modelCompleted: null };
-  }
+  const completedModels: BaseModel[] = [];
+  const notifications: ModelTickResult['notifications'] = [];
+  let baseModels = [...state.models.baseModels];
+  let families = [...state.models.families];

-  const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[active.generation] ?? 0;
-  if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) {
-    return { modelsState: state.models, modelCompleted: null };
-  }
+  const totalTrainingFlops = state.compute.totalTrainingFlops * state.compute.trainingAllocation;

  const researcherBoost = state.talent.departments.research.headcount *
    state.talent.departments.research.effectiveness;
@@ -23,82 +42,487 @@ export function processModels(state: GameState): ModelTickResult {
    state.talent.departments.engineering.effectiveness;
  const speedMultiplier = 1 + (researcherBoost + engineerBoost) * 0.05;

-  const newProgress = active.progressTicks + speedMultiplier;
+  const updatedPipelines: TrainingPipeline[] = [];

-  if (newProgress >= active.totalTicks) {
-    const model = createTrainedModel(active.modelName, active.generation, active.allocatedCompute, active.allocatedDataTokens, state);
+  for (const pipeline of state.models.activeTrainingPipelines) {
+    if (pipeline.status !== 'active') {
+      updatedPipelines.push(pipeline);
+      continue;
+    }

-    return {
-      modelsState: {
-        ...state.models,
-        trainedModels: [...state.models.trainedModels, model],
-        activeTraining: null,
-      },
-      modelCompleted: model,
-    };
+    const generation = families.find(f => f.id === pipeline.familyId)?.generation ?? 1;
+    const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[generation] ?? 0;
+    if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) {
+      updatedPipelines.push({ ...pipeline, status: 'stalled' });
+      continue;
+    }
+
+    const effectiveFlops = totalTrainingFlops * pipeline.allocatedComputeFraction;
+    let updated = { ...pipeline, events: [...pipeline.events] };
+
+    if (pipeline.currentStage === 'pretraining') {
+      const stage = { ...pipeline.stages.pretraining };
+      const newProgress = stage.progressTicks + speedMultiplier;
+
+      const events = generateTrainingEvents(pipeline, state);
+      let tickDelay = 0;
+      let progressLost = 0;
+      for (const event of events) {
+        updated.events.push(event);
+        if (event.type === 'loss_spike') {
+          tickDelay += event.impact.ticksDelayed ?? 0;
+          notifications.push({ title: 'Loss Spike', message: `${pipeline.modelName}: Training loss spiked! Delayed ${event.impact.ticksDelayed} ticks.`, type: 'warning' });
+        } else if (event.type === 'instability') {
+          progressLost += event.impact.progressLost ?? 0;
+          notifications.push({ title: 'Training Instability', message: `${pipeline.modelName}: Rolled back to checkpoint. Lost ${Math.round((event.impact.progressLost ?? 0) * 100)}% progress.`, type: 'warning' });
+        } else if (event.type === 'breakthrough') {
+          notifications.push({ title: 'Breakthrough!', message: `${pipeline.modelName}: Unexpected capability jump in ${event.impact.capabilityDomain}!`, type: 'success' });
+        } else if (event.type === 'hardware_failure') {
+          tickDelay += event.impact.ticksDelayed ?? 0;
+          notifications.push({ title: 'Hardware Failure', message: `${pipeline.modelName}: GPU failure during training. Recovering from checkpoint.`, type: 'warning' });
+        } else if (event.type === 'data_contamination') {
+          notifications.push({ title: 'Data Contamination', message: `${pipeline.modelName}: Copyright concerns detected in training data.`, type: 'warning' });
+        }
+      }
+
+      const effectiveProgress = Math.max(0, newProgress - tickDelay - (stage.totalTicks * progressLost));
+      stage.progressTicks = effectiveProgress;
+      stage.computeAllocated = effectiveFlops;
+      stage.lossValue = Math.max(0.01, 10 * Math.exp(-stage.progressTicks / stage.totalTicks * 3));
+
+      if (stage.progressTicks >= stage.totalTicks) {
+        stage.isComplete = true;
+        stage.progressTicks = stage.totalTicks;
+
+        if (updated.stages.sft) {
+          updated.currentStage = 'sft';
+          notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to supervised fine-tuning.`, type: 'info' });
+        } else if (updated.stages.alignment) {
+          updated.currentStage = 'alignment';
+          notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' });
+        } else {
+          const model = createBaseModel(updated, state);
+          baseModels = [...baseModels, model];
+          families = families.map(f =>
+            f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
+          );
+          completedModels.push(model);
+          updated.status = 'completed';
+        }
+      }
+      updated = { ...updated, stages: { ...updated.stages, pretraining: stage } };
+    } else if (pipeline.currentStage === 'sft' && pipeline.stages.sft) {
+      const stage = { ...pipeline.stages.sft };
+      stage.progressTicks += speedMultiplier;
+
+      if (stage.progressTicks >= stage.totalTicks) {
+        stage.isComplete = true;
+        stage.progressTicks = stage.totalTicks;
+
+        if (updated.stages.alignment) {
+          updated.currentStage = 'alignment';
+          notifications.push({ title: 'SFT Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' });
+        } else {
+          const model = createBaseModel(updated, state);
+          baseModels = [...baseModels, model];
+          families = families.map(f =>
+            f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
+          );
+          completedModels.push(model);
+          updated.status = 'completed';
+        }
+      }
+      updated = { ...updated, stages: { ...updated.stages, sft: stage } };
+    } else if (pipeline.currentStage === 'alignment' && pipeline.stages.alignment) {
+      const stage = { ...pipeline.stages.alignment };
+      stage.progressTicks += speedMultiplier;
+
+      if (stage.progressTicks >= stage.totalTicks) {
+        stage.isComplete = true;
+        stage.progressTicks = stage.totalTicks;
+
+        const model = createBaseModel(updated, state);
+        baseModels = [...baseModels, model];
+        families = families.map(f =>
+          f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
+        );
+        completedModels.push(model);
+        updated.status = 'completed';
+      }
+      updated = { ...updated, stages: { ...updated.stages, alignment: stage } };
+    }
+
+    updatedPipelines.push(updated);
  }

+  const updatedVariantJobs = processVariantJobs(state, speedMultiplier);
+  for (const variant of updatedVariantJobs.newVariants) {
+    variant.createdAtTick = state.meta.tickCount;
+    families = families.map(f =>
+      f.id === variant.familyId ? { ...f, variants: [...f.variants, variant] } : f,
+    );
+    notifications.push({
+      title: 'Variant Created',
+      message: `${variant.name} (${variant.variantType}) is ready!`,
+      type: 'success',
+    });
+  }
+
+  const updatedEvalJobs = processEvalJobs(state);
+
+  const allDeployed = [
+    ...baseModels.filter(m => m.isDeployed),
+    ...families.flatMap(f => f.variants.filter(v => v.isDeployed)),
+  ];
+
+  const bestDeployedModelScore = allDeployed.reduce((best, m) =>
+    Math.max(best, 'rawCapability' in m ? m.rawCapability : computeVariantScore(m)), 0);
+
+  const bestDeployedSafetyScore = allDeployed.reduce((best, m) =>
+    Math.max(best, m.safetyProfile.overallSafety), 0);
+
  return {
    modelsState: {
      ...state.models,
-      activeTraining: { ...active, progressTicks: newProgress },
+      baseModels,
+      families,
+      activeTrainingPipelines: updatedPipelines,
+      variantJobs: updatedVariantJobs.jobs,
+      evalJobs: updatedEvalJobs.jobs,
+      benchmarkResults: [...state.models.benchmarkResults, ...updatedEvalJobs.newResults],
+      bestDeployedModelScore,
+      bestDeployedSafetyScore,
    },
-    modelCompleted: null,
+    completedModels,
+    notifications,
  };
 }

-function createTrainedModel(
-  name: string,
-  generation: number,
-  compute: number,
-  dataTokens: number,
+function generateTrainingEvents(pipeline: TrainingPipeline, state: GameState): TrainingEvent[] {
+  const events: TrainingEvent[] = [];
+  const params = pipeline.architecture.totalParameters;
+  const baseProbability = EVENT_BASE_PROBABILITY * Math.log10(Math.max(1, params));
+
+  const hasInterpretability = state.research.completedResearch.includes('interpretability');
+  const hasDataPipeline = state.research.completedResearch.includes('data-pipeline');
+  const hasRedundancy = state.research.completedResearch.includes('redundancy-protocols');
+
+  if (Math.random() < baseProbability * 2.0) {
+    const delay = LOSS_SPIKE_DELAY_MIN + Math.floor(Math.random() * (LOSS_SPIKE_DELAY_MAX - LOSS_SPIKE_DELAY_MIN));
+    events.push({
+      id: uuid(), type: 'loss_spike', tick: state.meta.tickCount,
+      severity: delay > 15 ? 'major' : delay > 10 ? 'moderate' : 'minor',
+      description: `Training loss spiked to ${(Math.random() * 5 + 2).toFixed(2)}`,
+      resolved: true,
+      impact: { ticksDelayed: delay },
+    });
+  }
+
+  if (params > 10 && Math.random() < baseProbability * (hasInterpretability ? 0.25 : 0.5)) {
+    const loss = INSTABILITY_PROGRESS_LOSS_MIN + Math.random() * (INSTABILITY_PROGRESS_LOSS_MAX - INSTABILITY_PROGRESS_LOSS_MIN);
+    events.push({
+      id: uuid(), type: 'instability', tick: state.meta.tickCount,
+      severity: loss > 0.12 ? 'major' : 'moderate',
+      description: 'Training run became unstable. Rolling back to last checkpoint.',
+      resolved: true,
+      impact: { progressLost: loss },
+    });
+  }
+
+  const chinchillaRatio = pipeline.stages.pretraining.chinchillaRatio;
+  if (params > 30 && chinchillaRatio > 15 && Math.random() < baseProbability * 0.3) {
+    const capDomains: (keyof ModelCapabilities)[] = ['reasoning', 'coding', 'creative', 'math', 'knowledge', 'agents'];
+    const domain = capDomains[Math.floor(Math.random() * capDomains.length)];
+    const bonus = BREAKTHROUGH_CAPABILITY_BONUS_MIN + Math.floor(Math.random() * (BREAKTHROUGH_CAPABILITY_BONUS_MAX - BREAKTHROUGH_CAPABILITY_BONUS_MIN));
+    events.push({
+      id: uuid(), type: 'breakthrough', tick: state.meta.tickCount,
+      severity: 'major',
+      description: `Unexpected capability jump in ${domain}!`,
+      resolved: true,
+      impact: { capabilityBonus: bonus, capabilityDomain: domain },
+    });
+  }
+
+  for (const [thresholdStr, capName] of Object.entries(EMERGENT_CAPABILITY_THRESHOLDS)) {
+    const threshold = Number(thresholdStr);
+    const prevProgress = pipeline.stages.pretraining.progressTicks;
+    const progressRatio = prevProgress / pipeline.stages.pretraining.totalTicks;
+    if (params >= threshold && progressRatio > 0.5 && progressRatio < 0.55) {
+      events.push({
+        id: uuid(), type: 'emergent_capability', tick: state.meta.tickCount,
+        severity: 'major',
+        description: `Model developed ${capName} capability!`,
+        resolved: true,
+        impact: { capabilityBonus: 10, capabilityDomain: 'reasoning' },
+      });
+    }
+  }
+
+  const avgLegalRisk = state.data.ownedDatasets.length > 0
+    ? state.data.ownedDatasets.reduce((sum, d) => sum + d.legalRisk, 0) / state.data.ownedDatasets.length
+    : 0;
+  if (Math.random() < baseProbability * (hasDataPipeline ? 0.25 : 0.5) * avgLegalRisk) {
+    events.push({
+      id: uuid(), type: 'data_contamination', tick: state.meta.tickCount,
+      severity: 'moderate',
+      description: 'Copyright holders identified content in training data.',
+      resolved: true,
+      impact: {},
+    });
+  }
+
+  if (Math.random() < baseProbability * (hasRedundancy ? 0.1 : 0.2)) {
+    const delay = 10 + Math.floor(Math.random() * 20);
+    events.push({
+      id: uuid(), type: 'hardware_failure', tick: state.meta.tickCount,
+      severity: delay > 20 ? 'major' : 'moderate',
+      description: 'GPU cluster failure during training. Recovering from checkpoint.',
+      resolved: true,
+      impact: { ticksDelayed: delay },
+    });
+  }
+
+  return events;
+}
+
+function createBaseModel(
+  pipeline: TrainingPipeline,
  state: GameState,
-): TrainedModel {
+): BaseModel {
+  const { architecture, dataMix } = pipeline;
+  const compute = pipeline.stages.pretraining.computeAllocated;
+  const dataTokens = pipeline.stages.pretraining.targetTokens;
+
  const computeFactor = Math.sqrt(compute) * 5;
  const dataFactor = Math.log10(1 + dataTokens / 1e8) * 10;
  const researchBonus = state.research.completedResearch.length * 3;
  const efficiencyBonus = state.research.completedResearch.filter(r => r.includes('efficiency')).length * 5;

-  const baseCapability = Math.min(95, computeFactor + dataFactor + researchBonus + efficiencyBonus);
+  let rawCapability = Math.min(95, computeFactor + dataFactor + researchBonus + efficiencyBonus);
+
+  if (architecture.type === 'moe') {
+    rawCapability = Math.min(98, rawCapability * MOE_CAPABILITY_MULTIPLIER);
+  }

  const researcherQuality = state.talent.departments.research.effectiveness;
+
  const capabilities: ModelCapabilities = {
-    reasoning: clamp(baseCapability * (0.8 + Math.random() * 0.4) * (1 + researcherQuality * 0.2)),
-    coding: clamp(baseCapability * (0.7 + Math.random() * 0.5)),
-    creative: clamp(baseCapability * (0.6 + Math.random() * 0.6)),
-    multimodal: clamp(baseCapability * (0.3 + Math.random() * 0.3)),
-    agents: clamp(baseCapability * (0.2 + Math.random() * 0.3)),
-    speed: Math.max(1, 100 - compute * 0.5 + efficiencyBonus * 2),
+    reasoning: clamp(rawCapability * (0.6 + dataMix.scientific * 0.5 + dataMix.code * 0.3) * (1 + researcherQuality * 0.2)),
+    coding: clamp(rawCapability * (0.5 + dataMix.code * 1.0)),
+    creative: clamp(rawCapability * (0.4 + dataMix.books * 0.6 + dataMix.conversation * 0.3)),
+    math: clamp(rawCapability * (0.3 + dataMix.scientific * 0.7 + dataMix.code * 0.2)),
+    knowledge: clamp(rawCapability * (0.5 + dataMix.web * 0.3 + dataMix.books * 0.3)),
+    multimodal: clamp(rawCapability * (dataMix.images * 0.5 + dataMix.video * 0.4 + dataMix.audio * 0.2)),
+    agents: clamp(rawCapability * (0.2 + dataMix.code * 0.3 + dataMix.conversation * 0.2)),
+    speed: Math.max(1, 100 - architecture.totalParameters * 0.3 + efficiencyBonus * 2 + (architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER * 10 : 0)),
+    contextUtilization: Math.min(100, architecture.contextWindow * 0.4),
  };

+  const breakthroughBonuses: Partial<Record<keyof ModelCapabilities, number>> = {};
+  for (const event of pipeline.events) {
+    if ((event.type === 'breakthrough' || event.type === 'emergent_capability') && event.impact.capabilityDomain && event.impact.capabilityBonus) {
+      const domain = event.impact.capabilityDomain;
+      breakthroughBonuses[domain] = (breakthroughBonuses[domain] ?? 0) + event.impact.capabilityBonus;
+    }
+  }
+  for (const [domain, bonus] of Object.entries(breakthroughBonuses)) {
+    const key = domain as keyof ModelCapabilities;
+    capabilities[key] = clamp(capabilities[key] + bonus);
+  }
+
+  const completedStages: ('pretraining' | 'sft' | 'alignment')[] = ['pretraining'];
+
+  if (pipeline.stages.sft?.isComplete) {
+    completedStages.push('sft');
+    const sft = pipeline.stages.sft;
+    for (let i = 0; i < sft.specializations.length; i++) {
+      const spec = sft.specializations[i];
+      const bonuses = SFT_SPECIALIZATION_BONUSES[spec];
+      if (!bonuses) continue;
+      const diminishing = i === 0 ? 1.0 : i === 1 ? 0.7 : 0.4;
+      for (const [cap, value] of Object.entries(bonuses)) {
+        const key = cap as keyof ModelCapabilities;
+        capabilities[key] = clamp(capabilities[key] + value * diminishing);
+      }
+    }
+  }
+
  const safetyResearch = state.research.completedResearch.filter(
    r => r.includes('alignment') || r.includes('interpretability') || r.includes('constitutional'),
  ).length;
-  const safetyScore = Math.min(100, 30 + safetyResearch * 15 + Math.random() * 10);
+  let overallSafety = Math.min(100, 30 + safetyResearch * 15 + Math.random() * 10);
+  let refusalRate = overallSafety > 60 ? 0.1 : 0.03;

-  const safetyPenalty = safetyScore > 60 ? (safetyScore - 60) * 0.1 : 0;
-  const benchmarkScore = Math.max(0,
-    (capabilities.reasoning * 0.3 + capabilities.coding * 0.25 +
-    capabilities.creative * 0.2 + capabilities.multimodal * 0.15 + capabilities.agents * 0.1) - safetyPenalty,
-  );
+  if (pipeline.stages.alignment?.isComplete) {
+    completedStages.push('alignment');
+    const alignment = pipeline.stages.alignment;
+    const methodConfig = ALIGNMENT_METHODS[alignment.method];
+    if (methodConfig) {
+      const safetyGain = methodConfig.safetyGain * alignment.safetyWeight;
+      overallSafety = Math.min(100, overallSafety + safetyGain);
+      refusalRate = methodConfig.baseRefusal * Math.pow(alignment.safetyWeight, 1.5);
+      const capLoss = methodConfig.capabilityLoss * alignment.safetyWeight * 0.5;
+      for (const key of Object.keys(capabilities) as (keyof ModelCapabilities)[]) {
+        if (key !== 'speed' && key !== 'contextUtilization') {
+          capabilities[key] = clamp(capabilities[key] - capLoss);
+        }
+      }
+    }
+  }

-  const parameterCount = Math.pow(10, generation) * (0.5 + Math.random());
+  const safetyProfile: SafetyProfile = {
+    overallSafety,
+    refusalRate,
+    harmAvoidance: overallSafety,
+    instructionFollowing: capabilities.reasoning * 0.8,
+    honesty: overallSafety * 0.9,
+  };

  return {
    id: uuid(),
-    name,
-    generation,
-    parameterCount,
-    trainingDataSize: dataTokens,
+    familyId: pipeline.familyId,
+    name: pipeline.modelName,
+    architecture,
+    dataMix,
    capabilities,
-    safetyScore,
-    benchmarkScore,
-    tuning: { preset: 'helpful-safe' },
+    safetyProfile,
+    rawCapability,
    isDeployed: false,
    trainedAtTick: state.meta.tickCount,
+    trainingCostTotal: compute,
+    trainingStagesCompleted: completedStages,
  };
 }

+function processVariantJobs(
+  state: GameState,
+  speedMultiplier: number,
+): { jobs: VariantCreationJob[]; newVariants: ModelVariant[] } {
+  const newVariants: ModelVariant[] = [];
+  const jobs = state.models.variantJobs.map(job => {
+    if (job.status !== 'active') return job;
+    const newProgress = job.progressTicks + speedMultiplier;
+    if (newProgress >= job.totalTicks) {
+      const baseModel = state.models.baseModels.find(m => m.id === job.baseModelId);
+      if (baseModel) {
+        const variant = createVariant(job, baseModel);
+        newVariants.push(variant);
+      }
+      return { ...job, status: 'completed' as const, progressTicks: job.totalTicks };
+    }
+    return { ...job, progressTicks: newProgress };
+  });
+  return { jobs, newVariants };
+}
+
+function createVariant(job: VariantCreationJob, base: BaseModel): ModelVariant {
+  const caps = { ...base.capabilities };
+  let costMultiplier = 1.0;
+  let speedMultiplier = 1.0;
+  let variantName = base.name;
+  let arch = { ...base.architecture };
+
+  if (job.jobType === 'distillation' && 'targetParameters' in job.config) {
+    const config = job.config;
+    const sizeRatio = config.targetParameters / base.architecture.totalParameters;
+    const retention = DISTILLATION_BASE_RETENTION + sizeRatio * 0.25;
+    for (const key of Object.keys(caps) as (keyof ModelCapabilities)[]) {
+      caps[key] = clamp(caps[key] * retention);
+    }
+    costMultiplier = sizeRatio * 0.8;
+    speedMultiplier = (1 / sizeRatio) * 0.7;
+    arch = { ...arch, totalParameters: config.targetParameters, activeParameters: config.targetParameters };
+    variantName = config.variantName;
+  } else if (job.jobType === 'fine-tuning' && 'specialization' in job.config) {
+    const config = job.config;
+    const bonuses = SFT_SPECIALIZATION_BONUSES[config.specialization];
+    if (bonuses) {
+      for (const [cap, value] of Object.entries(bonuses)) {
+        caps[cap as keyof ModelCapabilities] = clamp(caps[cap as keyof ModelCapabilities] + value);
+      }
+    }
+    variantName = config.variantName;
+  } else if (job.jobType === 'quantization' && 'level' in job.config) {
+    const config = job.config;
+    const qConfig = QUANTIZATION_CONFIGS[config.level];
+    if (qConfig) {
+      for (const key of Object.keys(caps) as (keyof ModelCapabilities)[]) {
+        if (key !== 'speed') caps[key] = clamp(caps[key] * qConfig.qualityRetention);
+      }
+      caps.speed = clamp(caps.speed * qConfig.speedMultiplier);
+      costMultiplier = qConfig.costMultiplier;
+      speedMultiplier = qConfig.speedMultiplier;
+    }
+    variantName = config.variantName;
+  }
+
+  return {
+    id: uuid(),
+    familyId: base.familyId,
+    baseModelId: base.id,
+    name: variantName,
+    variantType: job.jobType === 'distillation' ? 'distilled' : job.jobType === 'fine-tuning' ? 'fine-tuned' : 'quantized',
+    architecture: arch,
+    capabilities: caps,
+    safetyProfile: { ...base.safetyProfile },
+    isDeployed: false,
+    createdAtTick: 0,
+    quantization: job.jobType === 'quantization' && 'level' in job.config ? job.config.level : undefined,
+    distillationRetention: job.jobType === 'distillation' && 'targetParameters' in job.config
+      ? DISTILLATION_BASE_RETENTION + (job.config.targetParameters / base.architecture.totalParameters) * 0.25
+      : undefined,
+    finetuneSpecialization: job.jobType === 'fine-tuning' && 'specialization' in job.config ? job.config.specialization : undefined,
+    costMultiplier,
+    speedMultiplier,
+  };
+}
+
+function processEvalJobs(state: GameState): { jobs: EvalJob[]; newResults: BenchmarkResult[] } {
+  const newResults: BenchmarkResult[] = [];
+  const allModels: (BaseModel | ModelVariant)[] = [
+    ...state.models.baseModels,
+    ...state.models.families.flatMap(f => f.variants),
+  ];
+
+  const jobs = state.models.evalJobs.map(job => {
+    if (job.status !== 'active') return job;
+    const newProgress = job.progressTicks + 1;
+    if (newProgress >= job.totalTicks) {
+      const model = allModels.find(m => m.id === job.modelId);
+      if (model) {
+        const results = computeBenchmarkScores(model, job.benchmarkIds, state.meta.tickCount);
+        newResults.push(...results);
+        return { ...job, status: 'completed' as const, progressTicks: job.totalTicks, results };
+      }
+      return { ...job, status: 'completed' as const, progressTicks: job.totalTicks };
+    }
+    return { ...job, progressTicks: newProgress };
+  });
+  return { jobs, newResults };
+}
+
+function computeBenchmarkScores(
+  model: BaseModel | ModelVariant,
+  benchmarkIds: string[],
+  tick: number,
+): BenchmarkResult[] {
+  const benchmarkMap = new Map(BENCHMARKS.map(b => [b.id, b]));
+  return benchmarkIds.map(id => {
+    const bench = benchmarkMap.get(id);
+    if (!bench) return { benchmarkId: id, modelId: model.id, score: 0, ranAtTick: tick };
+    const primary = model.capabilities[bench.primaryCapability] ?? 0;
+    const secondary = bench.secondaryCapability ? (model.capabilities[bench.secondaryCapability] ?? 0) : 0;
+    const noise = (Math.random() - 0.5) * 6;
+    const score = clamp(primary * 0.7 + secondary * 0.3 + noise);
+    return { benchmarkId: id, modelId: model.id, score, ranAtTick: tick };
+  });
+}
+
+function computeVariantScore(variant: ModelVariant): number {
+  const c = variant.capabilities;
+  return (c.reasoning * 0.25 + c.coding * 0.2 + c.creative * 0.15 + c.math * 0.15 + c.knowledge * 0.15 + c.agents * 0.1);
+}
+
 function clamp(n: number): number {
  return Math.min(100, Math.max(0, n));
 }
@@ -14,13 +14,9 @@ export interface ReputationTickResult {
 export function processReputation(state: GameState): ReputationState & { _safetyIncident?: boolean } {
  let { safetyRecord, publicPerception, employeeSatisfaction, regulatoryStanding } = state.reputation;

-  const bestModel = state.models.trainedModels
-    .filter(m => m.isDeployed)
-    .sort((a, b) => b.benchmarkScore - a.benchmarkScore)[0];
-
  let safetyIncident = false;
-  if (bestModel) {
-    const safetyLevel = bestModel.safetyScore;
+  if (state.models.bestDeployedSafetyScore > 0) {
+    const safetyLevel = state.models.bestDeployedSafetyScore;
    if (safetyLevel < LOW_SAFETY_THRESHOLD && state.meta.tickCount % 60 === 0) {
      const incidentProb = SAFETY_INCIDENT_PROBABILITY_BASE * (LOW_SAFETY_THRESHOLD - safetyLevel);
      if (Math.random() < incidentProb) {
@@ -40,13 +40,14 @@ export function processTick(state: GameState): Partial<GameState> {
  const stateWithInfra = { ...state, infrastructure };
  const modelResult = processModels(stateWithInfra);

-  if (modelResult.modelCompleted) {
+  for (const completed of modelResult.completedModels) {
    notifications.push({
      title: 'Training Complete',
-      message: `${modelResult.modelCompleted.name} is ready! Benchmark: ${modelResult.modelCompleted.benchmarkScore.toFixed(1)}/100`,
+      message: `${completed.name} is ready! Capability: ${completed.rawCapability.toFixed(1)}/100`,
      type: 'success',
    });
  }
+  notifications.push(...modelResult.notifications);

  const stateWithModels = { ...stateWithInfra, models: modelResult.modelsState };

@@ -23,6 +23,66 @@ export const CAPABILITY_FORMULA = {
  efficiencyWeight: 0.1,
 };

+export const PRETRAINING_BASE_TICKS = 180;
+export const SFT_TIME_FRACTION = 0.10;
+export const SFT_COMPUTE_FRACTION = 0.06;
+export const ALIGNMENT_TIME_FRACTION = 0.08;
+export const ALIGNMENT_COMPUTE_FRACTION = 0.04;
+export const CHINCHILLA_OPTIMAL_RATIO = 20;
+
+export const MAX_CONCURRENT_TRAINING: Record<string, number> = {
+  startup: 1, scaleup: 2, bigtech: 4, agi: 8,
+};
+
+export const DISTILLATION_COMPUTE_FRACTION = 0.15;
+export const DISTILLATION_TIME_FRACTION = 0.20;
+export const DISTILLATION_BASE_RETENTION = 0.70;
+export const FINETUNE_COMPUTE_FRACTION = 0.03;
+export const FINETUNE_TIME_FRACTION = 0.08;
+export const QUANTIZATION_TICKS = 8;
+
+export const MOE_CAPABILITY_MULTIPLIER = 1.15;
+export const MOE_SPEED_MULTIPLIER = 1.3;
+export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400];
+export const CONTEXT_WINDOW_OPTIONS = [4, 8, 32, 128, 256, 1024];
+
+export const EVENT_BASE_PROBABILITY = 0.001;
+export const LOSS_SPIKE_DELAY_MIN = 5;
+export const LOSS_SPIKE_DELAY_MAX = 20;
+export const INSTABILITY_PROGRESS_LOSS_MIN = 0.05;
+export const INSTABILITY_PROGRESS_LOSS_MAX = 0.15;
+export const BREAKTHROUGH_CAPABILITY_BONUS_MIN = 5;
+export const BREAKTHROUGH_CAPABILITY_BONUS_MAX = 15;
+
+export const EMERGENT_CAPABILITY_THRESHOLDS: Record<number, string> = {
+  10: 'basic-reasoning',
+  50: 'chain-of-thought',
+  100: 'tool-use',
+  500: 'long-horizon-planning',
+};
+
+export const QUANTIZATION_CONFIGS: Record<string, { qualityRetention: number; speedMultiplier: number; costMultiplier: number }> = {
+  fp16: { qualityRetention: 1.00, speedMultiplier: 1.0, costMultiplier: 1.0 },
+  int8: { qualityRetention: 0.97, speedMultiplier: 1.8, costMultiplier: 0.55 },
+  int4: { qualityRetention: 0.90, speedMultiplier: 3.0, costMultiplier: 0.30 },
+  int2: { qualityRetention: 0.75, speedMultiplier: 5.0, costMultiplier: 0.15 },
+};
+
+export const ALIGNMENT_METHODS: Record<string, { safetyGain: number; capabilityLoss: number; baseRefusal: number; requiredResearch: string }> = {
+  rlhf: { safetyGain: 25, capabilityLoss: 5, baseRefusal: 0.10, requiredResearch: 'alignment-research' },
+  dpo: { safetyGain: 20, capabilityLoss: 2, baseRefusal: 0.05, requiredResearch: 'interpretability' },
+  constitutional: { safetyGain: 30, capabilityLoss: 4, baseRefusal: 0.14, requiredResearch: 'constitutional-ai' },
+};
+
+export const SFT_SPECIALIZATION_BONUSES: Record<string, Record<string, number>> = {
+  general:      { reasoning: 5, coding: 5, creative: 5, math: 5, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  code:         { reasoning: 0, coding: 15, creative: -3, math: 8, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  math:         { reasoning: 8, coding: 0, creative: -3, math: 15, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  creative:     { reasoning: 0, coding: -3, creative: 15, math: 0, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
+  multilingual: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 10, multimodal: 0, agents: 0, speed: -5, contextUtilization: 0 },
+  'tool-use':   { reasoning: 0, coding: 8, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 15, speed: -5, contextUtilization: 0 },
+};
+
 export const CONSUMER_BASE_GROWTH = 0.002;
 export const CONSUMER_QUALITY_GROWTH_MULTIPLIER = 0.01;
 export const CONSUMER_PRICE_ELASTICITY = -0.5;
@@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = {
  sfxVolume: 0.7,
 };

-export const SAVE_VERSION = 5;
+export const SAVE_VERSION = 6;
@@ -1,51 +1,239 @@
-export interface ModelsState {
-  trainedModels: TrainedModel[];
-  activeTraining: TrainingJob | null;
-  productLines: ProductLine[];
+import type { Era } from './gameState';
+import type { DataDomain } from './data';
+
+export type ArchitectureType = 'dense' | 'moe';
+
+export interface ModelArchitecture {
+  type: ArchitectureType;
+  totalParameters: number;
+  activeParameters: number;
+  expertCount?: number;
+  expertTopK?: number;
+  contextWindow: number;
+  vocabularySize: number;
 }

-export interface TrainedModel {
+export type DataMixAllocation = Record<DataDomain, number>;
+
+export type TrainingStage = 'pretraining' | 'sft' | 'alignment';
+export type TrainingJobStatus = 'active' | 'paused' | 'stalled' | 'completed' | 'failed';
+
+export interface TrainingPipeline {
  id: string;
-  name: string;
-  generation: number;
-  parameterCount: number;
-  trainingDataSize: number;
-  capabilities: ModelCapabilities;
-  safetyScore: number;
-  benchmarkScore: number;
-  tuning: ModelTuning;
-  isDeployed: boolean;
-  trainedAtTick: number;
+  familyId: string;
+  modelName: string;
+  architecture: ModelArchitecture;
+  dataMix: DataMixAllocation;
+  currentStage: TrainingStage;
+  stages: {
+    pretraining: PreTrainingConfig;
+    sft: SFTConfig | null;
+    alignment: AlignmentConfig | null;
+  };
+  status: TrainingJobStatus;
+  allocatedComputeFraction: number;
+  events: TrainingEvent[];
+  startedAtTick: number;
+}
+
+export interface PreTrainingConfig {
+  targetTokens: number;
+  processedTokens: number;
+  computeAllocated: number;
+  progressTicks: number;
+  totalTicks: number;
+  lossValue: number;
+  chinchillaRatio: number;
+  isComplete: boolean;
+}
+
+export type SFTSpecialization = 'general' | 'code' | 'math' | 'creative' | 'multilingual' | 'tool-use';
+
+export interface SFTConfig {
+  specializations: SFTSpecialization[];
+  progressTicks: number;
+  totalTicks: number;
+  isComplete: boolean;
+}
+
+export type AlignmentMethod = 'rlhf' | 'dpo' | 'constitutional';
+
+export interface AlignmentConfig {
+  method: AlignmentMethod;
+  safetyWeight: number;
+  helpfulnessWeight: number;
+  progressTicks: number;
+  totalTicks: number;
+  isComplete: boolean;
+}
+
+export type TrainingEventType =
+  | 'loss_spike'
+  | 'instability'
+  | 'breakthrough'
+  | 'emergent_capability'
+  | 'data_contamination'
+  | 'hardware_failure';
+
+export interface TrainingEvent {
+  id: string;
+  type: TrainingEventType;
+  tick: number;
+  severity: 'minor' | 'moderate' | 'major';
+  description: string;
+  resolved: boolean;
+  impact: {
+    ticksDelayed?: number;
+    progressLost?: number;
+    capabilityBonus?: number;
+    capabilityDomain?: keyof ModelCapabilities;
+  };
 }

 export interface ModelCapabilities {
  reasoning: number;
  coding: number;
  creative: number;
+  math: number;
+  knowledge: number;
  multimodal: number;
  agents: number;
  speed: number;
+  contextUtilization: number;
 }

-export interface ModelTuning {
-  preset: TuningPreset;
-  verbosity?: number;
-  safetyLevel?: number;
-  creativity?: number;
-  speedQuality?: number;
-  refusalRate?: number;
+export interface SafetyProfile {
+  overallSafety: number;
+  refusalRate: number;
+  harmAvoidance: number;
+  instructionFollowing: number;
+  honesty: number;
 }

-export type TuningPreset = 'helpful-safe' | 'max-capability' | 'enterprise' | 'creative';
+export interface BaseModel {
+  id: string;
+  familyId: string;
+  name: string;
+  architecture: ModelArchitecture;
+  dataMix: DataMixAllocation;
+  capabilities: ModelCapabilities;
+  safetyProfile: SafetyProfile;
+  rawCapability: number;
+  isDeployed: boolean;
+  trainedAtTick: number;
+  trainingCostTotal: number;
+  trainingStagesCompleted: TrainingStage[];
+}

-export interface TrainingJob {
-  modelName: string;
+export type VariantType = 'distilled' | 'fine-tuned' | 'quantized';
+export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2';
+
+export interface ModelVariant {
+  id: string;
+  familyId: string;
+  baseModelId: string;
+  name: string;
+  variantType: VariantType;
+  architecture: ModelArchitecture;
+  capabilities: ModelCapabilities;
+  safetyProfile: SafetyProfile;
+  isDeployed: boolean;
+  createdAtTick: number;
+  quantization?: QuantizationLevel;
+  distillationRetention?: number;
+  finetuneSpecialization?: SFTSpecialization;
+  costMultiplier: number;
+  speedMultiplier: number;
+}
+
+export interface ModelFamily {
+  id: string;
+  name: string;
  generation: number;
-  allocatedCompute: number;
-  allocatedDataTokens: number;
+  baseModelId: string | null;
+  variants: ModelVariant[];
+  createdAtTick: number;
+}
+
+export type VariantJobType = 'distillation' | 'fine-tuning' | 'quantization';
+
+export interface VariantCreationJob {
+  id: string;
+  familyId: string;
+  baseModelId: string;
+  jobType: VariantJobType;
+  config: DistillationConfig | FineTuneConfig | QuantizationConfig;
  progressTicks: number;
  totalTicks: number;
-  estimatedCapability: number;
+  allocatedComputeFraction: number;
+  status: 'active' | 'completed';
+}
+
+export interface DistillationConfig {
+  targetParameters: number;
+  targetArchitecture: ArchitectureType;
+  variantName: string;
+}
+
+export interface FineTuneConfig {
+  specialization: SFTSpecialization;
+  datasetIds: string[];
+  variantName: string;
+}
+
+export interface QuantizationConfig {
+  level: QuantizationLevel;
+  variantName: string;
+}
+
+export type BenchmarkCategory = 'reasoning' | 'coding' | 'math' | 'knowledge' | 'safety' | 'chat' | 'multimodal' | 'agents';
+
+export interface BenchmarkDefinition {
+  id: string;
+  name: string;
+  category: BenchmarkCategory;
+  description: string;
+  primaryCapability: keyof ModelCapabilities;
+  secondaryCapability?: keyof ModelCapabilities;
+  computeCost: number;
+  ticksToRun: number;
+  unlockedAtEra: Era;
+  marketRelevance: {
+    consumer: number;
+    enterprise: number;
+    developer: number;
+    research: number;
+  };
+}
+
+export interface BenchmarkResult {
+  benchmarkId: string;
+  modelId: string;
+  score: number;
+  ranAtTick: number;
+  rank?: number;
+}
+
+export interface EvalJob {
+  id: string;
+  modelId: string;
+  benchmarkIds: string[];
+  progressTicks: number;
+  totalTicks: number;
+  computeAllocated: number;
+  status: 'active' | 'completed';
+  results: BenchmarkResult[];
+}
+
+export type ProductLineType = 'text-api' | 'chat-product' | 'chat-free' | 'chat-enterprise' | 'code-api' | 'image' | 'agents-api';
+
+export interface ProductPricing {
+  inputTokenPrice: number;
+  outputTokenPrice: number;
+  thinkingTokenBudget: number;
+  cachingEnabled: boolean;
+  subscriptionPrice: number;
+  freeTokenAllowance: number;
 }

 export interface ProductLine {
@@ -57,20 +245,38 @@ export interface ProductLine {
  pricing: ProductPricing;
 }

-export type ProductLineType = 'text-api' | 'chat-product' | 'image' | 'code' | 'agents';
-
-export interface ProductPricing {
-  inputTokenPrice: number;
-  outputTokenPrice: number;
-  thinkingTokenBudget: number;
-  cachingEnabled: boolean;
-  subscriptionPrice: number;
-  freeTokenAllowance: number;
+export interface ModelsState {
+  families: ModelFamily[];
+  baseModels: BaseModel[];
+  activeTrainingPipelines: TrainingPipeline[];
+  variantJobs: VariantCreationJob[];
+  evalJobs: EvalJob[];
+  benchmarkResults: BenchmarkResult[];
+  productLines: ProductLine[];
+  bestDeployedModelScore: number;
+  bestDeployedSafetyScore: number;
 }

+export const DEFAULT_DATA_MIX: DataMixAllocation = {
+  web: 0.35,
+  books: 0.10,
+  code: 0.15,
+  scientific: 0.10,
+  conversation: 0.10,
+  multilingual: 0.05,
+  images: 0.05,
+  video: 0.03,
+  audio: 0.02,
+  synthetic: 0.05,
+};
+
 export const INITIAL_MODELS: ModelsState = {
-  trainedModels: [],
-  activeTraining: null,
+  families: [],
+  baseModels: [],
+  activeTrainingPipelines: [],
+  variantJobs: [],
+  evalJobs: [],
+  benchmarkResults: [],
  productLines: [
    {
      id: 'text-api',
@@ -103,4 +309,6 @@ export const INITIAL_MODELS: ModelsState = {
      },
    },
  ],
+  bestDeployedModelScore: 0,
+  bestDeployedSafetyScore: 0,
 };