Overhaul model system with multi-stage training, variants, benchmarks, and eval
CI / build-and-push (push) Successful in 32s

Replace the single-stage training + flat capability score with a realistic AI
development pipeline: pre-training with Chinchilla scaling laws, SFT with
specializations, alignment with safety/capability tradeoffs (RLHF/DPO/Constitutional),
model families with distillation/fine-tuning/quantization variants, named benchmark
suite with compute-costing eval jobs, and segment-specific market quality.

Phases 1-6 of the model rework plan: new types, engine rewrite, save migration,
training events/risk system, concurrent training, variant creation, benchmark
evaluation with leaderboard, and market integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 07:36:34 -04:00
parent fc1f371c8c
commit 4c1c0e9ff2
24 changed files with 2157 additions and 357 deletions
@@ -23,6 +23,66 @@ export const CAPABILITY_FORMULA = {
efficiencyWeight: 0.1,
};
export const PRETRAINING_BASE_TICKS = 180;
export const SFT_TIME_FRACTION = 0.10;
export const SFT_COMPUTE_FRACTION = 0.06;
export const ALIGNMENT_TIME_FRACTION = 0.08;
export const ALIGNMENT_COMPUTE_FRACTION = 0.04;
export const CHINCHILLA_OPTIMAL_RATIO = 20;
export const MAX_CONCURRENT_TRAINING: Record<string, number> = {
startup: 1, scaleup: 2, bigtech: 4, agi: 8,
};
export const DISTILLATION_COMPUTE_FRACTION = 0.15;
export const DISTILLATION_TIME_FRACTION = 0.20;
export const DISTILLATION_BASE_RETENTION = 0.70;
export const FINETUNE_COMPUTE_FRACTION = 0.03;
export const FINETUNE_TIME_FRACTION = 0.08;
export const QUANTIZATION_TICKS = 8;
export const MOE_CAPABILITY_MULTIPLIER = 1.15;
export const MOE_SPEED_MULTIPLIER = 1.3;
export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400];
export const CONTEXT_WINDOW_OPTIONS = [4, 8, 32, 128, 256, 1024];
export const EVENT_BASE_PROBABILITY = 0.001;
export const LOSS_SPIKE_DELAY_MIN = 5;
export const LOSS_SPIKE_DELAY_MAX = 20;
export const INSTABILITY_PROGRESS_LOSS_MIN = 0.05;
export const INSTABILITY_PROGRESS_LOSS_MAX = 0.15;
export const BREAKTHROUGH_CAPABILITY_BONUS_MIN = 5;
export const BREAKTHROUGH_CAPABILITY_BONUS_MAX = 15;
export const EMERGENT_CAPABILITY_THRESHOLDS: Record<number, string> = {
10: 'basic-reasoning',
50: 'chain-of-thought',
100: 'tool-use',
500: 'long-horizon-planning',
};
export const QUANTIZATION_CONFIGS: Record<string, { qualityRetention: number; speedMultiplier: number; costMultiplier: number }> = {
fp16: { qualityRetention: 1.00, speedMultiplier: 1.0, costMultiplier: 1.0 },
int8: { qualityRetention: 0.97, speedMultiplier: 1.8, costMultiplier: 0.55 },
int4: { qualityRetention: 0.90, speedMultiplier: 3.0, costMultiplier: 0.30 },
int2: { qualityRetention: 0.75, speedMultiplier: 5.0, costMultiplier: 0.15 },
};
export const ALIGNMENT_METHODS: Record<string, { safetyGain: number; capabilityLoss: number; baseRefusal: number; requiredResearch: string }> = {
rlhf: { safetyGain: 25, capabilityLoss: 5, baseRefusal: 0.10, requiredResearch: 'alignment-research' },
dpo: { safetyGain: 20, capabilityLoss: 2, baseRefusal: 0.05, requiredResearch: 'interpretability' },
constitutional: { safetyGain: 30, capabilityLoss: 4, baseRefusal: 0.14, requiredResearch: 'constitutional-ai' },
};
export const SFT_SPECIALIZATION_BONUSES: Record<string, Record<string, number>> = {
general: { reasoning: 5, coding: 5, creative: 5, math: 5, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
code: { reasoning: 0, coding: 15, creative: -3, math: 8, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
math: { reasoning: 8, coding: 0, creative: -3, math: 15, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
creative: { reasoning: 0, coding: -3, creative: 15, math: 0, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
multilingual: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 10, multimodal: 0, agents: 0, speed: -5, contextUtilization: 0 },
'tool-use': { reasoning: 0, coding: 8, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 15, speed: -5, contextUtilization: 0 },
};
export const CONSUMER_BASE_GROWTH = 0.002;
export const CONSUMER_QUALITY_GROWTH_MULTIPLIER = 0.01;
export const CONSUMER_PRICE_ELASTICITY = -0.5;
+1 -1
View File
@@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = {
sfxVolume: 0.7,
};
export const SAVE_VERSION = 5;
export const SAVE_VERSION = 6;
+247 -39
View File
@@ -1,51 +1,239 @@
export interface ModelsState {
trainedModels: TrainedModel[];
activeTraining: TrainingJob | null;
productLines: ProductLine[];
import type { Era } from './gameState';
import type { DataDomain } from './data';
export type ArchitectureType = 'dense' | 'moe';
export interface ModelArchitecture {
type: ArchitectureType;
totalParameters: number;
activeParameters: number;
expertCount?: number;
expertTopK?: number;
contextWindow: number;
vocabularySize: number;
}
export interface TrainedModel {
export type DataMixAllocation = Record<DataDomain, number>;
export type TrainingStage = 'pretraining' | 'sft' | 'alignment';
export type TrainingJobStatus = 'active' | 'paused' | 'stalled' | 'completed' | 'failed';
export interface TrainingPipeline {
id: string;
name: string;
generation: number;
parameterCount: number;
trainingDataSize: number;
capabilities: ModelCapabilities;
safetyScore: number;
benchmarkScore: number;
tuning: ModelTuning;
isDeployed: boolean;
trainedAtTick: number;
familyId: string;
modelName: string;
architecture: ModelArchitecture;
dataMix: DataMixAllocation;
currentStage: TrainingStage;
stages: {
pretraining: PreTrainingConfig;
sft: SFTConfig | null;
alignment: AlignmentConfig | null;
};
status: TrainingJobStatus;
allocatedComputeFraction: number;
events: TrainingEvent[];
startedAtTick: number;
}
export interface PreTrainingConfig {
targetTokens: number;
processedTokens: number;
computeAllocated: number;
progressTicks: number;
totalTicks: number;
lossValue: number;
chinchillaRatio: number;
isComplete: boolean;
}
export type SFTSpecialization = 'general' | 'code' | 'math' | 'creative' | 'multilingual' | 'tool-use';
export interface SFTConfig {
specializations: SFTSpecialization[];
progressTicks: number;
totalTicks: number;
isComplete: boolean;
}
export type AlignmentMethod = 'rlhf' | 'dpo' | 'constitutional';
export interface AlignmentConfig {
method: AlignmentMethod;
safetyWeight: number;
helpfulnessWeight: number;
progressTicks: number;
totalTicks: number;
isComplete: boolean;
}
export type TrainingEventType =
| 'loss_spike'
| 'instability'
| 'breakthrough'
| 'emergent_capability'
| 'data_contamination'
| 'hardware_failure';
export interface TrainingEvent {
id: string;
type: TrainingEventType;
tick: number;
severity: 'minor' | 'moderate' | 'major';
description: string;
resolved: boolean;
impact: {
ticksDelayed?: number;
progressLost?: number;
capabilityBonus?: number;
capabilityDomain?: keyof ModelCapabilities;
};
}
export interface ModelCapabilities {
reasoning: number;
coding: number;
creative: number;
math: number;
knowledge: number;
multimodal: number;
agents: number;
speed: number;
contextUtilization: number;
}
export interface ModelTuning {
preset: TuningPreset;
verbosity?: number;
safetyLevel?: number;
creativity?: number;
speedQuality?: number;
refusalRate?: number;
export interface SafetyProfile {
overallSafety: number;
refusalRate: number;
harmAvoidance: number;
instructionFollowing: number;
honesty: number;
}
export type TuningPreset = 'helpful-safe' | 'max-capability' | 'enterprise' | 'creative';
export interface BaseModel {
id: string;
familyId: string;
name: string;
architecture: ModelArchitecture;
dataMix: DataMixAllocation;
capabilities: ModelCapabilities;
safetyProfile: SafetyProfile;
rawCapability: number;
isDeployed: boolean;
trainedAtTick: number;
trainingCostTotal: number;
trainingStagesCompleted: TrainingStage[];
}
export interface TrainingJob {
modelName: string;
export type VariantType = 'distilled' | 'fine-tuned' | 'quantized';
export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2';
export interface ModelVariant {
id: string;
familyId: string;
baseModelId: string;
name: string;
variantType: VariantType;
architecture: ModelArchitecture;
capabilities: ModelCapabilities;
safetyProfile: SafetyProfile;
isDeployed: boolean;
createdAtTick: number;
quantization?: QuantizationLevel;
distillationRetention?: number;
finetuneSpecialization?: SFTSpecialization;
costMultiplier: number;
speedMultiplier: number;
}
export interface ModelFamily {
id: string;
name: string;
generation: number;
allocatedCompute: number;
allocatedDataTokens: number;
baseModelId: string | null;
variants: ModelVariant[];
createdAtTick: number;
}
export type VariantJobType = 'distillation' | 'fine-tuning' | 'quantization';
export interface VariantCreationJob {
id: string;
familyId: string;
baseModelId: string;
jobType: VariantJobType;
config: DistillationConfig | FineTuneConfig | QuantizationConfig;
progressTicks: number;
totalTicks: number;
estimatedCapability: number;
allocatedComputeFraction: number;
status: 'active' | 'completed';
}
export interface DistillationConfig {
targetParameters: number;
targetArchitecture: ArchitectureType;
variantName: string;
}
export interface FineTuneConfig {
specialization: SFTSpecialization;
datasetIds: string[];
variantName: string;
}
export interface QuantizationConfig {
level: QuantizationLevel;
variantName: string;
}
export type BenchmarkCategory = 'reasoning' | 'coding' | 'math' | 'knowledge' | 'safety' | 'chat' | 'multimodal' | 'agents';
export interface BenchmarkDefinition {
id: string;
name: string;
category: BenchmarkCategory;
description: string;
primaryCapability: keyof ModelCapabilities;
secondaryCapability?: keyof ModelCapabilities;
computeCost: number;
ticksToRun: number;
unlockedAtEra: Era;
marketRelevance: {
consumer: number;
enterprise: number;
developer: number;
research: number;
};
}
export interface BenchmarkResult {
benchmarkId: string;
modelId: string;
score: number;
ranAtTick: number;
rank?: number;
}
export interface EvalJob {
id: string;
modelId: string;
benchmarkIds: string[];
progressTicks: number;
totalTicks: number;
computeAllocated: number;
status: 'active' | 'completed';
results: BenchmarkResult[];
}
export type ProductLineType = 'text-api' | 'chat-product' | 'chat-free' | 'chat-enterprise' | 'code-api' | 'image' | 'agents-api';
export interface ProductPricing {
inputTokenPrice: number;
outputTokenPrice: number;
thinkingTokenBudget: number;
cachingEnabled: boolean;
subscriptionPrice: number;
freeTokenAllowance: number;
}
export interface ProductLine {
@@ -57,20 +245,38 @@ export interface ProductLine {
pricing: ProductPricing;
}
export type ProductLineType = 'text-api' | 'chat-product' | 'image' | 'code' | 'agents';
export interface ProductPricing {
inputTokenPrice: number;
outputTokenPrice: number;
thinkingTokenBudget: number;
cachingEnabled: boolean;
subscriptionPrice: number;
freeTokenAllowance: number;
export interface ModelsState {
families: ModelFamily[];
baseModels: BaseModel[];
activeTrainingPipelines: TrainingPipeline[];
variantJobs: VariantCreationJob[];
evalJobs: EvalJob[];
benchmarkResults: BenchmarkResult[];
productLines: ProductLine[];
bestDeployedModelScore: number;
bestDeployedSafetyScore: number;
}
export const DEFAULT_DATA_MIX: DataMixAllocation = {
web: 0.35,
books: 0.10,
code: 0.15,
scientific: 0.10,
conversation: 0.10,
multilingual: 0.05,
images: 0.05,
video: 0.03,
audio: 0.02,
synthetic: 0.05,
};
export const INITIAL_MODELS: ModelsState = {
trainedModels: [],
activeTraining: null,
families: [],
baseModels: [],
activeTrainingPipelines: [],
variantJobs: [],
evalJobs: [],
benchmarkResults: [],
productLines: [
{
id: 'text-api',
@@ -103,4 +309,6 @@ export const INITIAL_MODELS: ModelsState = {
},
},
],
bestDeployedModelScore: 0,
bestDeployedSafetyScore: 0,
};