Overhaul model system with multi-stage training, variants, benchmarks, and eval
CI / build-and-push (push) Successful in 32s
CI / build-and-push (push) Successful in 32s
Replace the single-stage training + flat capability score with a realistic AI development pipeline: pre-training with Chinchilla scaling laws, SFT with specializations, alignment with safety/capability tradeoffs (RLHF/DPO/Constitutional), model families with distillation/fine-tuning/quantization variants, named benchmark suite with compute-costing eval jobs, and segment-specific market quality. Phases 1-6 of the model rework plan: new types, engine rewrite, save migration, training events/risk system, concurrent training, variant creation, benchmark evaluation with leaderboard, and market integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -23,6 +23,66 @@ export const CAPABILITY_FORMULA = {
|
||||
efficiencyWeight: 0.1,
|
||||
};
|
||||
|
||||
export const PRETRAINING_BASE_TICKS = 180;
|
||||
export const SFT_TIME_FRACTION = 0.10;
|
||||
export const SFT_COMPUTE_FRACTION = 0.06;
|
||||
export const ALIGNMENT_TIME_FRACTION = 0.08;
|
||||
export const ALIGNMENT_COMPUTE_FRACTION = 0.04;
|
||||
export const CHINCHILLA_OPTIMAL_RATIO = 20;
|
||||
|
||||
export const MAX_CONCURRENT_TRAINING: Record<string, number> = {
|
||||
startup: 1, scaleup: 2, bigtech: 4, agi: 8,
|
||||
};
|
||||
|
||||
export const DISTILLATION_COMPUTE_FRACTION = 0.15;
|
||||
export const DISTILLATION_TIME_FRACTION = 0.20;
|
||||
export const DISTILLATION_BASE_RETENTION = 0.70;
|
||||
export const FINETUNE_COMPUTE_FRACTION = 0.03;
|
||||
export const FINETUNE_TIME_FRACTION = 0.08;
|
||||
export const QUANTIZATION_TICKS = 8;
|
||||
|
||||
export const MOE_CAPABILITY_MULTIPLIER = 1.15;
|
||||
export const MOE_SPEED_MULTIPLIER = 1.3;
|
||||
export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400];
|
||||
export const CONTEXT_WINDOW_OPTIONS = [4, 8, 32, 128, 256, 1024];
|
||||
|
||||
export const EVENT_BASE_PROBABILITY = 0.001;
|
||||
export const LOSS_SPIKE_DELAY_MIN = 5;
|
||||
export const LOSS_SPIKE_DELAY_MAX = 20;
|
||||
export const INSTABILITY_PROGRESS_LOSS_MIN = 0.05;
|
||||
export const INSTABILITY_PROGRESS_LOSS_MAX = 0.15;
|
||||
export const BREAKTHROUGH_CAPABILITY_BONUS_MIN = 5;
|
||||
export const BREAKTHROUGH_CAPABILITY_BONUS_MAX = 15;
|
||||
|
||||
export const EMERGENT_CAPABILITY_THRESHOLDS: Record<number, string> = {
|
||||
10: 'basic-reasoning',
|
||||
50: 'chain-of-thought',
|
||||
100: 'tool-use',
|
||||
500: 'long-horizon-planning',
|
||||
};
|
||||
|
||||
export const QUANTIZATION_CONFIGS: Record<string, { qualityRetention: number; speedMultiplier: number; costMultiplier: number }> = {
|
||||
fp16: { qualityRetention: 1.00, speedMultiplier: 1.0, costMultiplier: 1.0 },
|
||||
int8: { qualityRetention: 0.97, speedMultiplier: 1.8, costMultiplier: 0.55 },
|
||||
int4: { qualityRetention: 0.90, speedMultiplier: 3.0, costMultiplier: 0.30 },
|
||||
int2: { qualityRetention: 0.75, speedMultiplier: 5.0, costMultiplier: 0.15 },
|
||||
};
|
||||
|
||||
export const ALIGNMENT_METHODS: Record<string, { safetyGain: number; capabilityLoss: number; baseRefusal: number; requiredResearch: string }> = {
|
||||
rlhf: { safetyGain: 25, capabilityLoss: 5, baseRefusal: 0.10, requiredResearch: 'alignment-research' },
|
||||
dpo: { safetyGain: 20, capabilityLoss: 2, baseRefusal: 0.05, requiredResearch: 'interpretability' },
|
||||
constitutional: { safetyGain: 30, capabilityLoss: 4, baseRefusal: 0.14, requiredResearch: 'constitutional-ai' },
|
||||
};
|
||||
|
||||
export const SFT_SPECIALIZATION_BONUSES: Record<string, Record<string, number>> = {
|
||||
general: { reasoning: 5, coding: 5, creative: 5, math: 5, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
|
||||
code: { reasoning: 0, coding: 15, creative: -3, math: 8, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
|
||||
math: { reasoning: 8, coding: 0, creative: -3, math: 15, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
|
||||
creative: { reasoning: 0, coding: -3, creative: 15, math: 0, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
|
||||
multilingual: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 10, multimodal: 0, agents: 0, speed: -5, contextUtilization: 0 },
|
||||
'tool-use': { reasoning: 0, coding: 8, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 15, speed: -5, contextUtilization: 0 },
|
||||
};
|
||||
|
||||
export const CONSUMER_BASE_GROWTH = 0.002;
|
||||
export const CONSUMER_QUALITY_GROWTH_MULTIPLIER = 0.01;
|
||||
export const CONSUMER_PRICE_ELASTICITY = -0.5;
|
||||
|
||||
@@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = {
|
||||
sfxVolume: 0.7,
|
||||
};
|
||||
|
||||
export const SAVE_VERSION = 5;
|
||||
export const SAVE_VERSION = 6;
|
||||
|
||||
@@ -1,51 +1,239 @@
|
||||
export interface ModelsState {
|
||||
trainedModels: TrainedModel[];
|
||||
activeTraining: TrainingJob | null;
|
||||
productLines: ProductLine[];
|
||||
import type { Era } from './gameState';
|
||||
import type { DataDomain } from './data';
|
||||
|
||||
export type ArchitectureType = 'dense' | 'moe';
|
||||
|
||||
export interface ModelArchitecture {
|
||||
type: ArchitectureType;
|
||||
totalParameters: number;
|
||||
activeParameters: number;
|
||||
expertCount?: number;
|
||||
expertTopK?: number;
|
||||
contextWindow: number;
|
||||
vocabularySize: number;
|
||||
}
|
||||
|
||||
export interface TrainedModel {
|
||||
export type DataMixAllocation = Record<DataDomain, number>;
|
||||
|
||||
export type TrainingStage = 'pretraining' | 'sft' | 'alignment';
|
||||
export type TrainingJobStatus = 'active' | 'paused' | 'stalled' | 'completed' | 'failed';
|
||||
|
||||
export interface TrainingPipeline {
|
||||
id: string;
|
||||
name: string;
|
||||
generation: number;
|
||||
parameterCount: number;
|
||||
trainingDataSize: number;
|
||||
capabilities: ModelCapabilities;
|
||||
safetyScore: number;
|
||||
benchmarkScore: number;
|
||||
tuning: ModelTuning;
|
||||
isDeployed: boolean;
|
||||
trainedAtTick: number;
|
||||
familyId: string;
|
||||
modelName: string;
|
||||
architecture: ModelArchitecture;
|
||||
dataMix: DataMixAllocation;
|
||||
currentStage: TrainingStage;
|
||||
stages: {
|
||||
pretraining: PreTrainingConfig;
|
||||
sft: SFTConfig | null;
|
||||
alignment: AlignmentConfig | null;
|
||||
};
|
||||
status: TrainingJobStatus;
|
||||
allocatedComputeFraction: number;
|
||||
events: TrainingEvent[];
|
||||
startedAtTick: number;
|
||||
}
|
||||
|
||||
export interface PreTrainingConfig {
|
||||
targetTokens: number;
|
||||
processedTokens: number;
|
||||
computeAllocated: number;
|
||||
progressTicks: number;
|
||||
totalTicks: number;
|
||||
lossValue: number;
|
||||
chinchillaRatio: number;
|
||||
isComplete: boolean;
|
||||
}
|
||||
|
||||
export type SFTSpecialization = 'general' | 'code' | 'math' | 'creative' | 'multilingual' | 'tool-use';
|
||||
|
||||
export interface SFTConfig {
|
||||
specializations: SFTSpecialization[];
|
||||
progressTicks: number;
|
||||
totalTicks: number;
|
||||
isComplete: boolean;
|
||||
}
|
||||
|
||||
export type AlignmentMethod = 'rlhf' | 'dpo' | 'constitutional';
|
||||
|
||||
export interface AlignmentConfig {
|
||||
method: AlignmentMethod;
|
||||
safetyWeight: number;
|
||||
helpfulnessWeight: number;
|
||||
progressTicks: number;
|
||||
totalTicks: number;
|
||||
isComplete: boolean;
|
||||
}
|
||||
|
||||
export type TrainingEventType =
|
||||
| 'loss_spike'
|
||||
| 'instability'
|
||||
| 'breakthrough'
|
||||
| 'emergent_capability'
|
||||
| 'data_contamination'
|
||||
| 'hardware_failure';
|
||||
|
||||
export interface TrainingEvent {
|
||||
id: string;
|
||||
type: TrainingEventType;
|
||||
tick: number;
|
||||
severity: 'minor' | 'moderate' | 'major';
|
||||
description: string;
|
||||
resolved: boolean;
|
||||
impact: {
|
||||
ticksDelayed?: number;
|
||||
progressLost?: number;
|
||||
capabilityBonus?: number;
|
||||
capabilityDomain?: keyof ModelCapabilities;
|
||||
};
|
||||
}
|
||||
|
||||
export interface ModelCapabilities {
|
||||
reasoning: number;
|
||||
coding: number;
|
||||
creative: number;
|
||||
math: number;
|
||||
knowledge: number;
|
||||
multimodal: number;
|
||||
agents: number;
|
||||
speed: number;
|
||||
contextUtilization: number;
|
||||
}
|
||||
|
||||
export interface ModelTuning {
|
||||
preset: TuningPreset;
|
||||
verbosity?: number;
|
||||
safetyLevel?: number;
|
||||
creativity?: number;
|
||||
speedQuality?: number;
|
||||
refusalRate?: number;
|
||||
export interface SafetyProfile {
|
||||
overallSafety: number;
|
||||
refusalRate: number;
|
||||
harmAvoidance: number;
|
||||
instructionFollowing: number;
|
||||
honesty: number;
|
||||
}
|
||||
|
||||
export type TuningPreset = 'helpful-safe' | 'max-capability' | 'enterprise' | 'creative';
|
||||
export interface BaseModel {
|
||||
id: string;
|
||||
familyId: string;
|
||||
name: string;
|
||||
architecture: ModelArchitecture;
|
||||
dataMix: DataMixAllocation;
|
||||
capabilities: ModelCapabilities;
|
||||
safetyProfile: SafetyProfile;
|
||||
rawCapability: number;
|
||||
isDeployed: boolean;
|
||||
trainedAtTick: number;
|
||||
trainingCostTotal: number;
|
||||
trainingStagesCompleted: TrainingStage[];
|
||||
}
|
||||
|
||||
export interface TrainingJob {
|
||||
modelName: string;
|
||||
export type VariantType = 'distilled' | 'fine-tuned' | 'quantized';
|
||||
export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2';
|
||||
|
||||
export interface ModelVariant {
|
||||
id: string;
|
||||
familyId: string;
|
||||
baseModelId: string;
|
||||
name: string;
|
||||
variantType: VariantType;
|
||||
architecture: ModelArchitecture;
|
||||
capabilities: ModelCapabilities;
|
||||
safetyProfile: SafetyProfile;
|
||||
isDeployed: boolean;
|
||||
createdAtTick: number;
|
||||
quantization?: QuantizationLevel;
|
||||
distillationRetention?: number;
|
||||
finetuneSpecialization?: SFTSpecialization;
|
||||
costMultiplier: number;
|
||||
speedMultiplier: number;
|
||||
}
|
||||
|
||||
export interface ModelFamily {
|
||||
id: string;
|
||||
name: string;
|
||||
generation: number;
|
||||
allocatedCompute: number;
|
||||
allocatedDataTokens: number;
|
||||
baseModelId: string | null;
|
||||
variants: ModelVariant[];
|
||||
createdAtTick: number;
|
||||
}
|
||||
|
||||
export type VariantJobType = 'distillation' | 'fine-tuning' | 'quantization';
|
||||
|
||||
export interface VariantCreationJob {
|
||||
id: string;
|
||||
familyId: string;
|
||||
baseModelId: string;
|
||||
jobType: VariantJobType;
|
||||
config: DistillationConfig | FineTuneConfig | QuantizationConfig;
|
||||
progressTicks: number;
|
||||
totalTicks: number;
|
||||
estimatedCapability: number;
|
||||
allocatedComputeFraction: number;
|
||||
status: 'active' | 'completed';
|
||||
}
|
||||
|
||||
export interface DistillationConfig {
|
||||
targetParameters: number;
|
||||
targetArchitecture: ArchitectureType;
|
||||
variantName: string;
|
||||
}
|
||||
|
||||
export interface FineTuneConfig {
|
||||
specialization: SFTSpecialization;
|
||||
datasetIds: string[];
|
||||
variantName: string;
|
||||
}
|
||||
|
||||
export interface QuantizationConfig {
|
||||
level: QuantizationLevel;
|
||||
variantName: string;
|
||||
}
|
||||
|
||||
export type BenchmarkCategory = 'reasoning' | 'coding' | 'math' | 'knowledge' | 'safety' | 'chat' | 'multimodal' | 'agents';
|
||||
|
||||
export interface BenchmarkDefinition {
|
||||
id: string;
|
||||
name: string;
|
||||
category: BenchmarkCategory;
|
||||
description: string;
|
||||
primaryCapability: keyof ModelCapabilities;
|
||||
secondaryCapability?: keyof ModelCapabilities;
|
||||
computeCost: number;
|
||||
ticksToRun: number;
|
||||
unlockedAtEra: Era;
|
||||
marketRelevance: {
|
||||
consumer: number;
|
||||
enterprise: number;
|
||||
developer: number;
|
||||
research: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface BenchmarkResult {
|
||||
benchmarkId: string;
|
||||
modelId: string;
|
||||
score: number;
|
||||
ranAtTick: number;
|
||||
rank?: number;
|
||||
}
|
||||
|
||||
export interface EvalJob {
|
||||
id: string;
|
||||
modelId: string;
|
||||
benchmarkIds: string[];
|
||||
progressTicks: number;
|
||||
totalTicks: number;
|
||||
computeAllocated: number;
|
||||
status: 'active' | 'completed';
|
||||
results: BenchmarkResult[];
|
||||
}
|
||||
|
||||
export type ProductLineType = 'text-api' | 'chat-product' | 'chat-free' | 'chat-enterprise' | 'code-api' | 'image' | 'agents-api';
|
||||
|
||||
export interface ProductPricing {
|
||||
inputTokenPrice: number;
|
||||
outputTokenPrice: number;
|
||||
thinkingTokenBudget: number;
|
||||
cachingEnabled: boolean;
|
||||
subscriptionPrice: number;
|
||||
freeTokenAllowance: number;
|
||||
}
|
||||
|
||||
export interface ProductLine {
|
||||
@@ -57,20 +245,38 @@ export interface ProductLine {
|
||||
pricing: ProductPricing;
|
||||
}
|
||||
|
||||
export type ProductLineType = 'text-api' | 'chat-product' | 'image' | 'code' | 'agents';
|
||||
|
||||
export interface ProductPricing {
|
||||
inputTokenPrice: number;
|
||||
outputTokenPrice: number;
|
||||
thinkingTokenBudget: number;
|
||||
cachingEnabled: boolean;
|
||||
subscriptionPrice: number;
|
||||
freeTokenAllowance: number;
|
||||
export interface ModelsState {
|
||||
families: ModelFamily[];
|
||||
baseModels: BaseModel[];
|
||||
activeTrainingPipelines: TrainingPipeline[];
|
||||
variantJobs: VariantCreationJob[];
|
||||
evalJobs: EvalJob[];
|
||||
benchmarkResults: BenchmarkResult[];
|
||||
productLines: ProductLine[];
|
||||
bestDeployedModelScore: number;
|
||||
bestDeployedSafetyScore: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_DATA_MIX: DataMixAllocation = {
|
||||
web: 0.35,
|
||||
books: 0.10,
|
||||
code: 0.15,
|
||||
scientific: 0.10,
|
||||
conversation: 0.10,
|
||||
multilingual: 0.05,
|
||||
images: 0.05,
|
||||
video: 0.03,
|
||||
audio: 0.02,
|
||||
synthetic: 0.05,
|
||||
};
|
||||
|
||||
export const INITIAL_MODELS: ModelsState = {
|
||||
trainedModels: [],
|
||||
activeTraining: null,
|
||||
families: [],
|
||||
baseModels: [],
|
||||
activeTrainingPipelines: [],
|
||||
variantJobs: [],
|
||||
evalJobs: [],
|
||||
benchmarkResults: [],
|
||||
productLines: [
|
||||
{
|
||||
id: 'text-api',
|
||||
@@ -103,4 +309,6 @@ export const INITIAL_MODELS: ModelsState = {
|
||||
},
|
||||
},
|
||||
],
|
||||
bestDeployedModelScore: 0,
|
||||
bestDeployedSafetyScore: 0,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user