diff --git a/apps/web/src/components/dev/StateInspectionTab.tsx b/apps/web/src/components/dev/StateInspectionTab.tsx index d9a45e7..5cc6472 100644 --- a/apps/web/src/components/dev/StateInspectionTab.tsx +++ b/apps/web/src/components/dev/StateInspectionTab.tsx @@ -107,9 +107,9 @@ export function StateInspectionTab() { - - - m.isDeployed).length} /> + + p.status === 'active').length} /> + m.isDeployed).length} /> ); diff --git a/apps/web/src/components/dev/TimeCompletionTab.tsx b/apps/web/src/components/dev/TimeCompletionTab.tsx index 7e77e0b..a750d42 100644 --- a/apps/web/src/components/dev/TimeCompletionTab.tsx +++ b/apps/web/src/components/dev/TimeCompletionTab.tsx @@ -111,12 +111,17 @@ function instantCompleteResearch() { } function instantCompleteTraining() { - const { activeTraining } = useGameStore.getState().models; - if (!activeTraining) return; + const { activeTrainingPipelines } = useGameStore.getState().models; + const active = activeTrainingPipelines.find(p => p.status === 'active'); + if (!active) return; useGameStore.setState((s) => ({ models: { ...s.models, - activeTraining: { ...activeTraining, progressTicks: activeTraining.totalTicks }, + activeTrainingPipelines: s.models.activeTrainingPipelines.map(p => + p.id === active.id + ? { ...p, stages: { ...p.stages, pretraining: { ...p.stages.pretraining, progressTicks: p.stages.pretraining.totalTicks } } } + : p, + ), }, })); } @@ -137,7 +142,7 @@ function forceEra(era: Era) { export function TimeCompletionTab() { const [tickCount, setTickCount] = useState('100'); const activeResearch = useGameStore((s) => s.research.activeResearch); - const activeTraining = useGameStore((s) => s.models.activeTraining); + const activeTraining = useGameStore((s) => s.models.activeTrainingPipelines.find(p => p.status === 'active')); const currentEra = useGameStore((s) => s.meta.currentEra); const pipelineCount = useGameStore((s) => @@ -189,6 +194,7 @@ export function TimeCompletionTab() { Training {activeTraining && `(${activeTraining.modelName})`} + diff --git a/apps/web/src/components/game/CompanyStatsCard.tsx b/apps/web/src/components/game/CompanyStatsCard.tsx index a01eb4d..d88b0b9 100644 --- a/apps/web/src/components/game/CompanyStatsCard.tsx +++ b/apps/web/src/components/game/CompanyStatsCard.tsx @@ -13,10 +13,8 @@ export function CompanyStatsCard({ onClose }: { onClose: () => void }) { const totalRevenue = useGameStore((s) => s.economy.totalRevenue); const valuation = useGameStore((s) => s.economy.funding.valuation); const subscribers = useGameStore((s) => s.market.consumers.totalSubscribers); - const models = useGameStore((s) => s.models.trainedModels.length); - const bestModel = useGameStore((s) => - s.models.trainedModels.reduce((best, m) => Math.max(best, m.benchmarkScore), 0), - ); + const models = useGameStore((s) => s.models.baseModels.length); + const bestModel = useGameStore((s) => s.models.bestDeployedModelScore); const reputation = useGameStore((s) => s.reputation.score); const achievements = useGameStore((s) => s.achievements.unlocked.length); const dataCenters = useGameStore((s) => s.infrastructure.totalDataCenterCount); diff --git a/apps/web/src/pages/CompetitorsPage.tsx b/apps/web/src/pages/CompetitorsPage.tsx index 16a9178..6728e8e 100644 --- a/apps/web/src/pages/CompetitorsPage.tsx +++ b/apps/web/src/pages/CompetitorsPage.tsx @@ -22,9 +22,7 @@ const ARCHETYPE_COLORS: Record = { export function CompetitorsPage() { const rivals = useGameStore((s) => s.competitors.rivals); const industryBenchmark = useGameStore((s) => s.competitors.industryBenchmark); - const playerBest = useGameStore((s) => - s.models.trainedModels.reduce((best, m) => Math.max(best, m.benchmarkScore), 0), - ); + const playerBest = useGameStore((s) => s.models.bestDeployedModelScore); const era = useGameStore((s) => s.meta.currentEra); const money = useGameStore((s) => s.economy.money); const acquireCompetitor = useGameStore((s) => s.acquireCompetitor); diff --git a/apps/web/src/pages/DashboardPage.tsx b/apps/web/src/pages/DashboardPage.tsx index da60788..c85c8fe 100644 --- a/apps/web/src/pages/DashboardPage.tsx +++ b/apps/web/src/pages/DashboardPage.tsx @@ -13,8 +13,8 @@ export function DashboardPage() { const expensesPerTick = useGameStore((s) => s.economy.expensesPerTick); const totalFlops = useGameStore((s) => s.infrastructure.totalFlops); const totalDCs = useGameStore((s) => s.infrastructure.totalDataCenterCount); - const trainedModels = useGameStore((s) => s.models.trainedModels); - const activeTraining = useGameStore((s) => s.models.activeTraining); + const baseModels = useGameStore((s) => s.models.baseModels); + const activePipelines = useGameStore((s) => s.models.activeTrainingPipelines); const subscribers = useGameStore((s) => s.market.consumers.totalSubscribers); const reputation = useGameStore((s) => s.reputation.score); const inferenceUtil = useGameStore((s) => s.compute.inferenceUtilization); @@ -33,13 +33,13 @@ export function DashboardPage() { )} - {totalDCs > 0 && trainedModels.length === 0 && !activeTraining && ( + {totalDCs > 0 && baseModels.length === 0 && activePipelines.length === 0 && ( You have compute available! Head to the Models tab to allocate compute for training and start your first model. )} - {trainedModels.length > 0 && !trainedModels.some(m => m.isDeployed) && ( + {baseModels.length > 0 && !baseModels.some(m => m.isDeployed) && ( Your model is trained! Deploy it from the Models tab to start serving customers and earning revenue. @@ -66,8 +66,8 @@ export function DashboardPage() { p.status === 'active').length > 0 ? `Training: ${activePipelines.filter(p => p.status === 'active').length} active` : 'Idle'} color="text-purple-400" onClick={() => useGameStore.getState().setActivePage('models')} /> diff --git a/apps/web/src/pages/LeaderboardPage.tsx b/apps/web/src/pages/LeaderboardPage.tsx index 7ec6401..9ab0cc8 100644 --- a/apps/web/src/pages/LeaderboardPage.tsx +++ b/apps/web/src/pages/LeaderboardPage.tsx @@ -26,9 +26,7 @@ export function LeaderboardPage() { const totalRevenue = useGameStore((s) => s.economy.totalRevenue); const era = useGameStore((s) => s.meta.currentEra); const tickCount = useGameStore((s) => s.meta.tickCount); - const bestModel = useGameStore((s) => - s.models.trainedModels.reduce((best, m) => Math.max(best, m.benchmarkScore), 0), - ); + const bestModel = useGameStore((s) => s.models.bestDeployedModelScore); useEffect(() => { setLoading(true); diff --git a/apps/web/src/pages/MarketPage.tsx b/apps/web/src/pages/MarketPage.tsx index ad142e1..b31552d 100644 --- a/apps/web/src/pages/MarketPage.tsx +++ b/apps/web/src/pages/MarketPage.tsx @@ -38,10 +38,7 @@ export function MarketPage() { const tokensDemand = useGameStore((s) => s.compute.tokensPerSecondDemand); const currentEra = useGameStore((s) => s.meta.currentEra); const reputationScore = useGameStore((s) => s.reputation.score); - const bestQuality = useGameStore((s) => { - const deployed = s.models.trainedModels.filter(m => m.isDeployed); - return deployed.length > 0 ? Math.max(...deployed.map(m => m.benchmarkScore)) / 100 : 0; - }); + const bestQuality = useGameStore((s) => s.models.bestDeployedModelScore / 100); const setProductPricing = useGameStore((s) => s.setProductPricing); const setOverloadPolicy = useGameStore((s) => s.setOverloadPolicy); const pricingFeedback = useAppliedFeedback(); diff --git a/apps/web/src/pages/ModelsPage.tsx b/apps/web/src/pages/ModelsPage.tsx index 73d50f6..6ec731e 100644 --- a/apps/web/src/pages/ModelsPage.tsx +++ b/apps/web/src/pages/ModelsPage.tsx @@ -1,48 +1,141 @@ import { useState } from 'react'; -import { Brain, Play, Rocket, Globe, SlidersHorizontal, ChevronDown, ChevronUp } from 'lucide-react'; +import { Play, Rocket, Globe, ChevronDown, ChevronUp, Beaker, Shield, Scissors, Wrench, Zap, BarChart3 } from 'lucide-react'; import { TutorialHint } from '@/components/game/TutorialHint'; import { useGameStore } from '@/store'; -import { formatNumber, formatPercent, formatDuration, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared'; -import type { TuningPreset } from '@ai-tycoon/shared'; +import { + formatNumber, formatPercent, formatDuration, + VRAM_REQUIREMENTS_BY_GENERATION, DEFAULT_DATA_MIX, + ALIGNMENT_METHODS, + QUANTIZATION_CONFIGS, + PARAMETER_OPTIONS, +} from '@ai-tycoon/shared'; +import type { + ModelArchitecture, DataMixAllocation, SFTSpecialization, AlignmentMethod, + DataDomain, QuantizationLevel, BaseModel, ModelVariant, BenchmarkResult, +} from '@ai-tycoon/shared'; +import { BENCHMARKS } from '@ai-tycoon/game-engine'; + +const DATA_MIX_PRESETS: Record = { + balanced: { label: 'Balanced', mix: DEFAULT_DATA_MIX }, + 'code-focused': { label: 'Code-Focused', mix: { web: 0.15, books: 0.05, code: 0.40, scientific: 0.15, conversation: 0.08, multilingual: 0.02, images: 0.03, video: 0.02, audio: 0.02, synthetic: 0.08 } }, + creative: { label: 'Creative', mix: { web: 0.15, books: 0.30, code: 0.05, scientific: 0.05, conversation: 0.25, multilingual: 0.05, images: 0.05, video: 0.03, audio: 0.02, synthetic: 0.05 } }, + research: { label: 'Research', mix: { web: 0.15, books: 0.10, code: 0.15, scientific: 0.35, conversation: 0.05, multilingual: 0.03, images: 0.02, video: 0.02, audio: 0.02, synthetic: 0.11 } }, +}; + +const SFT_OPTIONS: { value: SFTSpecialization; label: string }[] = [ + { value: 'general', label: 'General' }, + { value: 'code', label: 'Code' }, + { value: 'math', label: 'Math' }, + { value: 'creative', label: 'Creative' }, + { value: 'multilingual', label: 'Multilingual' }, + { value: 'tool-use', label: 'Tool Use' }, +]; + +const DOMAIN_LABELS: Record = { + web: 'Web', books: 'Books', code: 'Code', scientific: 'Scientific', + conversation: 'Conversation', multilingual: 'Multilingual', + images: 'Images', video: 'Video', audio: 'Audio', synthetic: 'Synthetic', +}; + +const QUANT_LABELS: Record = { + fp16: 'FP16', int8: 'INT8', int4: 'INT4', int2: 'INT2', +}; export function ModelsPage() { - const trainedModels = useGameStore((s) => s.models.trainedModels); - const activeTraining = useGameStore((s) => s.models.activeTraining); + const baseModels = useGameStore((s) => s.models.baseModels); + const families = useGameStore((s) => s.models.families); + const pipelines = useGameStore((s) => s.models.activeTrainingPipelines); + const variantJobs = useGameStore((s) => s.models.variantJobs); + const evalJobs = useGameStore((s) => s.models.evalJobs); + const benchmarkResults = useGameStore((s) => s.models.benchmarkResults); const productLines = useGameStore((s) => s.models.productLines); const totalFlops = useGameStore((s) => s.compute.totalFlops); const totalVramGB = useGameStore((s) => s.compute.totalVramGB); const trainingAlloc = useGameStore((s) => s.compute.trainingAllocation); const totalData = useGameStore((s) => s.data.totalTrainingTokens); - const startTraining = useGameStore((s) => s.startTraining); + const currentEra = useGameStore((s) => s.meta.currentEra); + const startTrainingPipeline = useGameStore((s) => s.startTrainingPipeline); + const configureSFT = useGameStore((s) => s.configureSFT); + const configureAlignment = useGameStore((s) => s.configureAlignment); const deployModel = useGameStore((s) => s.deployModel); + const deployVariant = useGameStore((s) => s.deployVariant); + const createDistillation = useGameStore((s) => s.createDistillation); + const createFineTune = useGameStore((s) => s.createFineTune); + const createQuantization = useGameStore((s) => s.createQuantization); + const startEvaluation = useGameStore((s) => s.startEvaluation); const setTrainingAllocation = useGameStore((s) => s.setTrainingAllocation); const openSourceModel = useGameStore((s) => s.openSourceModel); - const setModelTuning = useGameStore((s) => s.setModelTuning); const openSourcedModels = useGameStore((s) => s.market.openSourcedModels); const completedResearch = useGameStore((s) => s.research.completedResearch); - const hasTuningSliders = completedResearch.includes('alignment-research'); const [modelName, setModelName] = useState(''); const [expandedModel, setExpandedModel] = useState(null); + const [expandedPipeline, setExpandedPipeline] = useState(null); + const [parameterCount, setParameterCount] = useState(7); + const [contextWindow, setContextWindow] = useState(8); + const [archType, setArchType] = useState<'dense' | 'moe'>('dense'); + const [dataMix, setDataMix] = useState({ ...DEFAULT_DATA_MIX }); + const [dataMixPreset, setDataMixPreset] = useState('balanced'); const trainingFlops = totalFlops * trainingAlloc; - const estimatedTicks = trainingFlops > 0 ? Math.max(30, Math.ceil(120 / (1 + trainingFlops * 0.1))) : Infinity; + const estimatedTicks = trainingFlops > 0 ? Math.max(30, Math.ceil(180 / (1 + trainingFlops * 0.1))) : Infinity; const estimatedCapability = Math.min(95, Math.sqrt(trainingFlops) * 5 + Math.log10(1 + totalData / 1e8) * 10); + const activePipelines = pipelines.filter(p => p.status === 'active' || p.status === 'stalled'); + + const eraOrder = ['startup', 'scaleup', 'bigtech', 'agi'] as const; + const currentEraIdx = eraOrder.indexOf(currentEra); + const availableBenchmarks = BENCHMARKS.filter(b => eraOrder.indexOf(b.unlockedAtEra) <= currentEraIdx); + const handleStartTraining = () => { - if (activeTraining || trainingFlops === 0) return; - const name = modelName.trim() || `Model v${trainedModels.length + 1}`; - startTraining({ + if (trainingFlops === 0) return; + const name = modelName.trim() || `Model v${families.length + 1}`; + + const architecture: ModelArchitecture = { + type: archType, + totalParameters: parameterCount, + activeParameters: archType === 'moe' ? Math.ceil(parameterCount * 0.25) : parameterCount, + contextWindow, + vocabularySize: 32000, + ...(archType === 'moe' ? { expertCount: 8, expertTopK: 2 } : {}), + }; + + startTrainingPipeline({ modelName: name, - generation: trainedModels.length + 1, - allocatedCompute: trainingFlops, - allocatedDataTokens: totalData, + architecture, + dataMix, + allocatedComputeFraction: 1.0, + targetTokens: totalData, totalTicks: estimatedTicks, - estimatedCapability, }); setModelName(''); }; + const handlePresetChange = (presetKey: string) => { + setDataMixPreset(presetKey); + const preset = DATA_MIX_PRESETS[presetKey]; + if (preset) setDataMix({ ...preset.mix }); + }; + + const handleMixSlider = (domain: DataDomain, value: number) => { + const newMix = { ...dataMix, [domain]: value / 100 }; + const total = Object.values(newMix).reduce((s, v) => s + v, 0); + if (total > 0) { + for (const key of Object.keys(newMix) as DataDomain[]) { + newMix[key] = newMix[key] / total; + } + } + setDataMix(newMix); + setDataMixPreset('custom'); + }; + + const hasAlignmentResearch = completedResearch.some(r => + r === 'alignment-research' || r === 'interpretability' || r === 'constitutional-ai', + ); + + const activeVariantJobs = variantJobs.filter(j => j.status === 'active'); + const activeEvalJobs = evalJobs.filter(j => j.status === 'active'); + return (

Models

@@ -51,14 +144,13 @@ export function ModelsPage() { Split compute between training (building new models) and inference (serving customers). Deploy trained models to start earning revenue. + {/* Compute Allocation */}

Compute Allocation

Training setTrainingAllocation(Number(e.target.value) / 100)} className="flex-1 accent-accent" @@ -71,186 +163,93 @@ export function ModelsPage() {
-
-

Train New Model

- {activeTraining ? ( -
-
- {activeTraining.modelName} - - {formatPercent(activeTraining.progressTicks / activeTraining.totalTicks)} complete - -
-
-
-
-
- ETA: {formatDuration(activeTraining.totalTicks - activeTraining.progressTicks)} -
- {(() => { - const reqVram = VRAM_REQUIREMENTS_BY_GENERATION[activeTraining.generation] ?? 0; - return reqVram > 0 && totalVramGB < reqVram ? ( -

- Training stalled — requires {formatNumber(reqVram)} GB VRAM (have {formatNumber(totalVramGB)} GB). Deploy more GPU racks. -

- ) : null; - })()} -
- ) : ( -
-
- - setModelName(e.target.value)} - placeholder={`Model v${trainedModels.length + 1}`} - className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50" - /> -
-
-
-
Training Compute
-
{formatNumber(trainingFlops)} FLOPS
-
-
-
Available VRAM
-
{formatNumber(totalVramGB)} GB
-
-
-
Training Data
-
{formatNumber(totalData)} tokens
-
-
-
Est. Time
-
{trainingFlops > 0 ? formatDuration(estimatedTicks) : 'N/A'}
-
-
-
- Estimated capability score: {estimatedCapability.toFixed(1)}/100 -
-
- - {trainingFlops === 0 && totalFlops === 0 && ( -

Build a data center and order racks first

- )} - {trainingFlops === 0 && totalFlops > 0 && ( -

Allocate compute to training above

- )} -
-
- )} -
- - {trainedModels.length > 0 && ( + {/* Active Training Pipelines */} + {activePipelines.length > 0 && (
-

Trained Models

- {trainedModels.map(model => { - const isExpanded = expandedModel === model.id; - const isOpenSourced = openSourcedModels.includes(model.id); +

Active Training

+ {activePipelines.map(pipeline => { + const stage = pipeline.currentStage === 'pretraining' ? pipeline.stages.pretraining + : pipeline.currentStage === 'sft' ? pipeline.stages.sft + : pipeline.stages.alignment; + if (!stage) return null; + const progress = stage.progressTicks / stage.totalTicks; + const generation = families.find(f => f.id === pipeline.familyId)?.generation ?? 1; + const reqVram = VRAM_REQUIREMENTS_BY_GENERATION[generation] ?? 0; + const isStalled = pipeline.status === 'stalled'; + const isExpanded = expandedPipeline === pipeline.id; + + const stageLabel = pipeline.currentStage === 'pretraining' ? 'Pre-training' + : pipeline.currentStage === 'sft' ? 'SFT' : 'Alignment'; + + const recentEvents = pipeline.events.slice(-3).reverse(); return ( -
-
+
+
-
-

{model.name}

-
- Gen {model.generation} · Benchmark: {model.benchmarkScore.toFixed(1)}/100 · Safety: {model.safetyScore.toFixed(0)}/100 - {isOpenSourced && Open Source} -
+ {pipeline.modelName} + + {pipeline.architecture.totalParameters}B {pipeline.architecture.type.toUpperCase()} · {pipeline.architecture.contextWindow}K ctx +
- {!isOpenSourced && model.isDeployed && ( - - )} - {model.isDeployed ? ( - Deployed - ) : ( - - )} + {stageLabel} + + {isStalled ? Stalled : `${formatPercent(progress)}`} +
- {isExpanded && ( -
-
- - Model Tuning -
-
- -
- {(['helpful-safe', 'max-capability', 'enterprise', 'creative'] as TuningPreset[]).map(preset => ( - - ))} -
-
+
+ + + +
- {hasTuningSliders && ( -
- setModelTuning(model.id, { safetyLevel: v })} /> - setModelTuning(model.id, { creativity: v })} /> - setModelTuning(model.id, { verbosity: v })} /> - setModelTuning(model.id, { speedQuality: v })} /> +
+
+
+
+ {isStalled + ? `Requires ${formatNumber(reqVram)} GB VRAM (have ${formatNumber(totalVramGB)} GB)` + : `ETA: ${formatDuration(stage.totalTicks - stage.progressTicks)}`} +
+ + {isExpanded && ( +
+ {pipeline.currentStage === 'pretraining' && ( +
+ Loss: {pipeline.stages.pretraining.lossValue.toFixed(3)} + {' · '}Chinchilla ratio: {pipeline.stages.pretraining.chinchillaRatio.toFixed(1)}
)} -
-
- Reasoning -
{model.capabilities.reasoning.toFixed(1)}
+ {!pipeline.stages.sft && pipeline.stages.pretraining.progressTicks < pipeline.stages.pretraining.totalTicks * 0.5 && ( + + )} + + {recentEvents.length > 0 && ( +
+ Recent Events + {recentEvents.map(event => ( +
+ {event.description} +
+ ))}
-
- Coding -
{model.capabilities.coding.toFixed(1)}
-
-
- Creative -
{model.capabilities.creative.toFixed(1)}
-
-
+ )}
)}
@@ -259,6 +258,283 @@ export function ModelsPage() {
)} + {/* Active Variant Jobs */} + {activeVariantJobs.length > 0 && ( +
+

Variant Jobs

+ {activeVariantJobs.map(job => { + const base = baseModels.find(m => m.id === job.baseModelId); + const progress = job.progressTicks / job.totalTicks; + return ( +
+
+ {('variantName' in job.config ? (job.config as { variantName: string }).variantName : base?.name) ?? 'Variant'} + {job.jobType} +
+
+
+
+
+ {formatPercent(progress)} · ETA: {formatDuration(job.totalTicks - job.progressTicks)} +
+
+ ); + })} +
+ )} + + {/* Active Eval Jobs */} + {activeEvalJobs.length > 0 && ( +
+

Running Evaluations

+ {activeEvalJobs.map(job => { + const model = baseModels.find(m => m.id === job.modelId) ?? families.flatMap(f => f.variants).find(v => v.id === job.modelId); + const progress = job.progressTicks / job.totalTicks; + return ( +
+
+ {model?.name ?? 'Unknown'} — {job.benchmarkIds.length} benchmarks + {formatPercent(progress)} +
+
+
+
+
+ ); + })} +
+ )} + + {/* Train New Model */} +
+

Train New Model

+
+
+
+ + setModelName(e.target.value)} + placeholder={`Model v${families.length + 1}`} + className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50" + /> +
+
+ +
+ + +
+
+
+
+
+ + +
+
+ + +
+
+ + {/* Data Mix */} +
+
+ +
+ {Object.entries(DATA_MIX_PRESETS).map(([key, preset]) => ( + + ))} +
+
+
+ {(Object.keys(DOMAIN_LABELS) as DataDomain[]).map(domain => ( +
+ {DOMAIN_LABELS[domain]} + handleMixSlider(domain, Number(e.target.value))} + className="flex-1 accent-accent h-1" + /> + {Math.round(dataMix[domain] * 100)}% +
+ ))} +
+
+ + {/* Stats */} +
+
+
Training Compute
+
{formatNumber(trainingFlops)} FLOPS
+
+
+
Available VRAM
+
{formatNumber(totalVramGB)} GB
+
+
+
Training Data
+
{formatNumber(totalData)} tokens
+
+
+
Est. Time
+
{trainingFlops > 0 ? formatDuration(estimatedTicks) : 'N/A'}
+
+
+
+ Estimated capability: {estimatedCapability.toFixed(1)}/100 + {archType === 'moe' && (+15% MoE bonus)} +
+
+ + {trainingFlops === 0 && totalFlops === 0 && ( +

Build a data center and order racks first

+ )} + {trainingFlops === 0 && totalFlops > 0 && ( +

Allocate compute to training above

+ )} +
+
+
+ + {/* Model Families & Trained Models */} + {families.length > 0 && ( +
+

Model Families

+ {families.map(family => { + const base = baseModels.find(m => m.familyId === family.id); + const variants = family.variants; + const isExpanded = expandedModel === family.id; + + if (!base) return null; + + return ( +
+
+
+ +
+

{family.name} Gen {family.generation}

+
+ {base.architecture.totalParameters}B {base.architecture.type.toUpperCase()} · Cap: {base.rawCapability.toFixed(1)} · Safety: {base.safetyProfile.overallSafety.toFixed(0)} + {variants.length > 0 && · {variants.length} variant{variants.length > 1 ? 's' : ''}} +
+
+
+
+ deployModel(base.id)} onOpenSource={() => openSourceModel(base.id)} /> +
+
+ + {isExpanded && ( +
+ {/* Base model details */} + + + {/* Variant creation */} + + + {/* Benchmark evaluation */} + + + {/* Variants tree */} + {variants.length > 0 && ( +
+ Variants + {variants.map(variant => ( + deployVariant(family.id, variant.id)} + onStartEval={startEvaluation} + /> + ))} +
+ )} +
+ )} +
+ ); + })} +
+ )} + + {/* Benchmark Leaderboard */} + {benchmarkResults.length > 0 && ( + + )} + + {/* Product Lines */}

Product Lines

{productLines.map(pl => ( @@ -267,7 +543,7 @@ export function ModelsPage() {

{pl.name}

- {pl.modelId ? `Running: ${trainedModels.find(m => m.id === pl.modelId)?.name ?? 'Unknown'}` : 'No model deployed'} + {pl.modelId ? `Running: ${baseModels.find(m => m.id === pl.modelId)?.name ?? 'Unknown'}` : 'No model deployed'}
@@ -281,21 +557,548 @@ export function ModelsPage() { ); } -function TuningSlider({ label, value, onChange }: { label: string; value: number; onChange: (v: number) => void }) { +function ModelActions({ model, isOpenSourced, onDeploy, onOpenSource }: { + model: BaseModel; + isOpenSourced: boolean; + onDeploy: () => void; + onOpenSource: () => void; +}) { return ( -
-
- {label} - {(value * 100).toFixed(0)}% + <> + {!isOpenSourced && model.isDeployed && ( + + )} + {model.isDeployed ? ( + Deployed + ) : ( + + )} + + ); +} + +function ModelDetails({ model, benchmarkResults }: { model: BaseModel; benchmarkResults: BenchmarkResult[] }) { + const modelResults = benchmarkResults.filter(r => r.modelId === model.id); + + return ( +
+
+
+ Architecture +
{model.architecture.totalParameters}B {model.architecture.type}
+
+
+ Context +
{model.architecture.contextWindow >= 1024 ? `${model.architecture.contextWindow / 1024}M` : `${model.architecture.contextWindow}K`}
+
+
+ Stages +
{model.trainingStagesCompleted.join(' + ')}
+
- onChange(Number(e.target.value) / 100)} - className="w-full accent-accent h-1.5" - /> +
+ {(['reasoning', 'coding', 'creative', 'math', 'knowledge', 'multimodal', 'agents', 'speed', 'contextUtilization'] as const).map(cap => ( +
+ {cap === 'contextUtilization' ? 'Context Util.' : cap} +
{model.capabilities[cap].toFixed(1)}
+
+ ))} +
+
+
+ Safety +
{model.safetyProfile.overallSafety.toFixed(1)}
+
+
+ Harm Avoidance +
{model.safetyProfile.harmAvoidance.toFixed(1)}
+
+
+ Refusal Rate +
{formatPercent(model.safetyProfile.refusalRate)}
+
+
+ + {modelResults.length > 0 && ( +
+ Benchmark Scores +
+ {modelResults.map(r => { + const bench = BENCHMARKS.find(b => b.id === r.benchmarkId); + return ( +
+ {bench?.name ?? r.benchmarkId} +
{r.score.toFixed(1)}
+
+ ); + })} +
+
+ )} +
+ ); +} + +function VariantCreator({ model, completedResearch, onDistill, onFineTune, onQuantize }: { + model: BaseModel; + completedResearch: string[]; + onDistill: (baseModelId: string, targetParams: number, name: string) => void; + onFineTune: (baseModelId: string, spec: SFTSpecialization, name: string) => void; + onQuantize: (baseModelId: string, level: QuantizationLevel, name: string) => void; +}) { + const [showCreator, setShowCreator] = useState(false); + const [creatorTab, setCreatorTab] = useState<'distill' | 'finetune' | 'quantize'>('quantize'); + const [distillParams, setDistillParams] = useState(7); + const [ftSpec, setFtSpec] = useState('code'); + const [quantLevel, setQuantLevel] = useState('int8'); + + const hasDistillation = completedResearch.includes('distillation'); + const hasQuantization = completedResearch.includes('quantization') || completedResearch.includes('model-compression'); + + const smallerParams = PARAMETER_OPTIONS.filter(p => p < model.architecture.totalParameters); + + if (!showCreator) { + return ( + + ); + } + + return ( +
+
+ Create Variant + +
+ +
+ {hasDistillation && ( + + )} + + {hasQuantization && ( + + )} +
+ + {creatorTab === 'distill' && hasDistillation && ( +
+
+ + +
+
+ Retention: ~{((0.70 + (distillParams / model.architecture.totalParameters) * 0.25) * 100).toFixed(0)}% quality +
+ +
+ )} + + {creatorTab === 'finetune' && ( +
+
+ +
+ {SFT_OPTIONS.map(opt => ( + + ))} +
+
+ +
+ )} + + {creatorTab === 'quantize' && hasQuantization && ( +
+
+ +
+ {(Object.keys(QUANTIZATION_CONFIGS) as QuantizationLevel[]).map(level => { + const cfg = QUANTIZATION_CONFIGS[level]; + return ( + + ); + })} +
+
+ +
+ )} +
+ ); +} + +function BenchmarkEvaluator({ modelId, modelName, availableBenchmarks, benchmarkResults, evalJobs, onStartEval }: { + modelId: string; + modelName: string; + availableBenchmarks: typeof BENCHMARKS; + benchmarkResults: BenchmarkResult[]; + evalJobs: { id: string; modelId: string; status: string }[]; + onStartEval: (modelId: string, benchmarkIds: string[]) => void; +}) { + const [showEval, setShowEval] = useState(false); + const [selectedBenchmarks, setSelectedBenchmarks] = useState([]); + + const existingResults = benchmarkResults.filter(r => r.modelId === modelId); + const evaluatedIds = new Set(existingResults.map(r => r.benchmarkId)); + const isEvaluating = evalJobs.some(j => j.modelId === modelId && j.status === 'active'); + const unevaluated = availableBenchmarks.filter(b => !evaluatedIds.has(b.id)); + + if (unevaluated.length === 0 && !showEval) { + return null; + } + + if (!showEval) { + return ( + + ); + } + + return ( +
+
+ Run Evaluation + +
+
+ {availableBenchmarks.map(bench => { + const alreadyDone = evaluatedIds.has(bench.id); + const selected = selectedBenchmarks.includes(bench.id); + return ( + + ); + })} +
+ {selectedBenchmarks.length > 0 && ( +
+ + {selectedBenchmarks.length} benchmark{selectedBenchmarks.length > 1 ? 's' : ''} · ~{availableBenchmarks.filter(b => selectedBenchmarks.includes(b.id)).reduce((s, b) => s + b.ticksToRun, 0)} ticks + + +
+ )} +
+ ); +} + +function VariantCard({ variant, familyId, benchmarkResults, availableBenchmarks, evalJobs, onDeploy, onStartEval }: { + variant: ModelVariant; + familyId: string; + benchmarkResults: BenchmarkResult[]; + availableBenchmarks: typeof BENCHMARKS; + evalJobs: { id: string; modelId: string; status: string }[]; + onDeploy: () => void; + onStartEval: (modelId: string, benchmarkIds: string[]) => void; +}) { + const [isExpanded, setIsExpanded] = useState(false); + const variantResults = benchmarkResults.filter(r => r.modelId === variant.id); + + const typeLabel = variant.variantType === 'distilled' ? 'Distilled' + : variant.variantType === 'fine-tuned' ? 'Fine-tuned' : 'Quantized'; + const typeColor = variant.variantType === 'distilled' ? 'text-purple-400' + : variant.variantType === 'fine-tuned' ? 'text-yellow-400' : 'text-green-400'; + + return ( +
+
+
+ +
+ {variant.name} + {typeLabel} + {variant.quantization && {variant.quantization.toUpperCase()}} + {variant.finetuneSpecialization && {variant.finetuneSpecialization}} +
+
+
+ + {variant.costMultiplier < 1 ? `${(variant.costMultiplier * 100).toFixed(0)}% cost` : ''} + {variant.speedMultiplier > 1 ? ` ${variant.speedMultiplier.toFixed(1)}x speed` : ''} + + {variant.isDeployed ? ( + Deployed + ) : ( + + )} +
+
+ + {isExpanded && ( +
+
+ {(['reasoning', 'coding', 'creative', 'math', 'knowledge', 'speed'] as const).map(cap => ( +
+ {cap} +
{variant.capabilities[cap].toFixed(1)}
+
+ ))} +
+ + {variantResults.length > 0 && ( +
+ {variantResults.map(r => { + const bench = BENCHMARKS.find(b => b.id === r.benchmarkId); + return ( +
+ {bench?.name ?? r.benchmarkId} +
{r.score.toFixed(1)}
+
+ ); + })} +
+ )} + + +
+ )} +
+ ); +} + +function BenchmarkLeaderboard({ benchmarkResults, baseModels, families, availableBenchmarks }: { + benchmarkResults: BenchmarkResult[]; + baseModels: BaseModel[]; + families: { id: string; name: string; variants: ModelVariant[] }[]; + availableBenchmarks: typeof BENCHMARKS; +}) { + const allModels: (BaseModel | ModelVariant)[] = [ + ...baseModels, + ...families.flatMap(f => f.variants), + ]; + + const modelNames = new Map(allModels.map(m => [m.id, m.name])); + const benchmarksWithResults = availableBenchmarks.filter(b => + benchmarkResults.some(r => r.benchmarkId === b.id), + ); + + if (benchmarksWithResults.length === 0) return null; + + const modelIds = [...new Set(benchmarkResults.map(r => r.modelId))]; + + return ( +
+

+ Benchmark Leaderboard +

+
+ + + + + {benchmarksWithResults.map(b => ( + + ))} + + + + + {modelIds.map(modelId => { + const results = benchmarkResults.filter(r => r.modelId === modelId); + const scores = benchmarksWithResults.map(b => { + const r = results.find(r => r.benchmarkId === b.id); + return r?.score ?? null; + }); + const validScores = scores.filter((s): s is number => s !== null); + const avg = validScores.length > 0 ? validScores.reduce((a, b) => a + b, 0) / validScores.length : 0; + + return ( + + + {scores.map((score, i) => ( + + ))} + + + ); + })} + +
Model{b.name}Avg
{modelNames.get(modelId) ?? 'Unknown'} + {score !== null ? ( + = 80 ? 'text-success' : score >= 50 ? 'text-accent-light' : 'text-surface-400'}> + {score.toFixed(1)} + + ) : ( + + )} + + {avg > 0 ? avg.toFixed(1) : '—'} +
+
+
+ ); +} + +function StageBar({ label, active, complete, progress, configured = true }: { + label: string; active: boolean; complete: boolean; progress: number; configured?: boolean; +}) { + return ( +
+
{label}
+
+ {active && !complete && ( +
+ )} +
+
+ ); +} + +function PostTrainingConfig({ pipelineId, hasAlignmentResearch, completedResearch, configureSFT, configureAlignment }: { + pipelineId: string; + hasAlignmentResearch: boolean; + completedResearch: string[]; + configureSFT: (pipelineId: string, specializations: SFTSpecialization[]) => void; + configureAlignment: (pipelineId: string, method: AlignmentMethod, safetyWeight: number) => void; +}) { + const [selectedSpecs, setSelectedSpecs] = useState(['general']); + const [alignMethod, setAlignMethod] = useState('rlhf'); + const [safetyWeight, setSafetyWeight] = useState(0.5); + + return ( +
+
Configure Post-Training (optional)
+ +
+
+ Supervised Fine-Tuning +
+
+ {SFT_OPTIONS.map(opt => ( + + ))} +
+ +
+ + {hasAlignmentResearch && ( +
+
+ Alignment +
+
+ {(Object.keys(ALIGNMENT_METHODS) as AlignmentMethod[]).map(method => { + const isAvailable = completedResearch.includes(ALIGNMENT_METHODS[method].requiredResearch); + return ( + + ); + })} +
+
+ Safety + setSafetyWeight(Number(e.target.value) / 100)} + className="flex-1 accent-accent h-1" /> + Helpful +
+ +
+ )}
); } diff --git a/apps/web/src/store/index.ts b/apps/web/src/store/index.ts index 1930b30..9c2758e 100644 --- a/apps/web/src/store/index.ts +++ b/apps/web/src/store/index.ts @@ -6,13 +6,17 @@ import type { ResearchState, ModelsState, MarketState, CompetitorState, TalentState, DataState, ReputationState, AchievementState, - Cluster, Campus, DataCenter, DCTier, RackSkuId, TrainingJob, + Cluster, Campus, DataCenter, DCTier, RackSkuId, ActiveResearch, OwnedDataset, LocationId, DeploymentCohort, PipelineStage, CampusRetrofitQueue, CoolingType, NetworkFabric, + FundingRoundType, OverloadPolicy, + TrainingPipeline, ModelFamily, DataMixAllocation, + ModelArchitecture, + SFTSpecialization, QuantizationLevel, VariantCreationJob, + EvalJob, } from '@ai-tycoon/shared'; -import type { FundingRoundType, OverloadPolicy, TuningPreset, ModelTuning } from '@ai-tycoon/shared'; import { INITIAL_SETTINGS, SAVE_VERSION, INITIAL_ECONOMY, INITIAL_INFRASTRUCTURE, INITIAL_COMPUTE, @@ -29,9 +33,15 @@ import { estimateNetworkSlots, maxComputeRacks, uuid, COOLING_TYPE_CONFIGS, COOLING_ORDER, NETWORK_FABRIC_CONFIGS, FABRIC_ORDER, + DEFAULT_DATA_MIX, + MAX_CONCURRENT_TRAINING, + DISTILLATION_TIME_FRACTION, DISTILLATION_COMPUTE_FRACTION, + FINETUNE_TIME_FRACTION, FINETUNE_COMPUTE_FRACTION, + QUANTIZATION_TICKS, } from '@ai-tycoon/shared'; import { emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary, + BENCHMARKS, } from '@ai-tycoon/game-engine'; import { INITIAL_RIVALS } from '@ai-tycoon/game-engine'; @@ -97,8 +107,15 @@ interface Actions { upgradeDataCenter: (dataCenterId: string, upgrade: 'cooling' | 'redundancy') => void; upgradeCoolingType: (dataCenterId: string, targetCooling: CoolingType) => void; upgradeNetworkFabric: (dataCenterId: string, targetFabric: NetworkFabric) => void; - startTraining: (job: Omit) => void; + startTrainingPipeline: (config: { modelName: string; architecture: ModelArchitecture; dataMix: DataMixAllocation; allocatedComputeFraction: number; targetTokens: number; totalTicks: number }) => void; + configureSFT: (pipelineId: string, specializations: import('@ai-tycoon/shared').SFTSpecialization[]) => void; + configureAlignment: (pipelineId: string, method: import('@ai-tycoon/shared').AlignmentMethod, safetyWeight: number) => void; + createDistillation: (baseModelId: string, targetParameters: number, variantName: string) => void; + createFineTune: (baseModelId: string, specialization: SFTSpecialization, variantName: string) => void; + createQuantization: (baseModelId: string, level: QuantizationLevel, variantName: string) => void; + startEvaluation: (modelId: string, benchmarkIds: string[]) => void; deployModel: (modelId: string) => void; + deployVariant: (familyId: string, variantId: string) => void; setProductPricing: (productLineId: string, field: string, value: number) => void; toggleProductLine: (productLineId: string) => void; startResearch: (research: ActiveResearch) => void; @@ -107,7 +124,6 @@ interface Actions { raiseFunding: (roundType: FundingRoundType) => void; openSourceModel: (modelId: string) => void; setOverloadPolicy: (policy: Partial) => void; - setModelTuning: (modelId: string, tuning: Partial) => void; acquireCompetitor: (competitorId: string) => void; updateState: (partial: Partial) => void; } @@ -873,17 +889,175 @@ export const useGameStore = create()( // --- Non-infrastructure actions (unchanged) --- - startTraining: (job) => set((s) => ({ + startTrainingPipeline: (config) => set((s) => { + const activeCount = s.models.activeTrainingPipelines.filter(p => p.status === 'active' || p.status === 'stalled').length; + const maxSlots = MAX_CONCURRENT_TRAINING[s.meta.currentEra] ?? 1; + if (activeCount >= maxSlots) return s; + + const familyId = uuid(); + const pipelineId = uuid(); + const generation = s.models.families.length + 1; + + const family: ModelFamily = { + id: familyId, + name: config.modelName, + generation, + baseModelId: null, + variants: [], + createdAtTick: s.meta.tickCount, + }; + + const pipeline: TrainingPipeline = { + id: pipelineId, + familyId, + modelName: config.modelName, + architecture: config.architecture, + dataMix: config.dataMix, + currentStage: 'pretraining', + stages: { + pretraining: { + targetTokens: config.targetTokens, + processedTokens: 0, + computeAllocated: 0, + progressTicks: 0, + totalTicks: config.totalTicks, + lossValue: 10, + chinchillaRatio: config.targetTokens / (config.architecture.totalParameters * 1e9), + isComplete: false, + }, + sft: null, + alignment: null, + }, + status: 'active', + allocatedComputeFraction: config.allocatedComputeFraction, + events: [], + startedAtTick: s.meta.tickCount, + }; + + return { + models: { + ...s.models, + families: [...s.models.families, family], + activeTrainingPipelines: [...s.models.activeTrainingPipelines, pipeline], + }, + }; + }), + + configureSFT: (pipelineId, specializations) => set((s) => ({ models: { ...s.models, - activeTraining: { ...job, progressTicks: 0 }, + activeTrainingPipelines: s.models.activeTrainingPipelines.map(p => + p.id === pipelineId ? { + ...p, + stages: { + ...p.stages, + sft: { + specializations, + progressTicks: 0, + totalTicks: Math.ceil(p.stages.pretraining.totalTicks * 0.10), + isComplete: false, + }, + }, + } : p, + ), }, })), + configureAlignment: (pipelineId, method, safetyWeight) => set((s) => ({ + models: { + ...s.models, + activeTrainingPipelines: s.models.activeTrainingPipelines.map(p => + p.id === pipelineId ? { + ...p, + stages: { + ...p.stages, + alignment: { + method, + safetyWeight, + helpfulnessWeight: 1 - safetyWeight, + progressTicks: 0, + totalTicks: Math.ceil(p.stages.pretraining.totalTicks * 0.08), + isComplete: false, + }, + }, + } : p, + ), + }, + })), + + createDistillation: (baseModelId, targetParameters, variantName) => set((s) => { + const base = s.models.baseModels.find(m => m.id === baseModelId); + if (!base) return s; + const job: VariantCreationJob = { + id: uuid(), + familyId: base.familyId, + baseModelId, + jobType: 'distillation', + config: { targetParameters, targetArchitecture: base.architecture.type, variantName }, + progressTicks: 0, + totalTicks: Math.ceil(base.trainingCostTotal > 0 ? DISTILLATION_TIME_FRACTION * 120 : 30), + allocatedComputeFraction: DISTILLATION_COMPUTE_FRACTION, + status: 'active', + }; + return { models: { ...s.models, variantJobs: [...s.models.variantJobs, job] } }; + }), + + createFineTune: (baseModelId, specialization, variantName) => set((s) => { + const base = s.models.baseModels.find(m => m.id === baseModelId); + if (!base) return s; + const job: VariantCreationJob = { + id: uuid(), + familyId: base.familyId, + baseModelId, + jobType: 'fine-tuning', + config: { specialization, datasetIds: [], variantName }, + progressTicks: 0, + totalTicks: Math.ceil(FINETUNE_TIME_FRACTION * 120), + allocatedComputeFraction: FINETUNE_COMPUTE_FRACTION, + status: 'active', + }; + return { models: { ...s.models, variantJobs: [...s.models.variantJobs, job] } }; + }), + + createQuantization: (baseModelId, level, variantName) => set((s) => { + const base = s.models.baseModels.find(m => m.id === baseModelId); + if (!base) return s; + const job: VariantCreationJob = { + id: uuid(), + familyId: base.familyId, + baseModelId, + jobType: 'quantization', + config: { level, variantName }, + progressTicks: 0, + totalTicks: QUANTIZATION_TICKS, + allocatedComputeFraction: 0, + status: 'active', + }; + return { models: { ...s.models, variantJobs: [...s.models.variantJobs, job] } }; + }), + + startEvaluation: (modelId, benchmarkIds) => set((s) => { + const benchmarks = BENCHMARKS.filter(b => benchmarkIds.includes(b.id)); + if (benchmarks.length === 0) return s; + const totalTicks = benchmarks.reduce((sum, b) => sum + b.ticksToRun, 0); + const computeCost = benchmarks.reduce((sum, b) => sum + b.computeCost, 0); + const job: EvalJob = { + id: uuid(), + modelId, + benchmarkIds, + progressTicks: 0, + totalTicks, + computeAllocated: computeCost, + status: 'active', + results: [], + }; + return { models: { ...s.models, evalJobs: [...s.models.evalJobs, job] } }; + }), + deployModel: (modelId) => set((s) => ({ models: { ...s.models, - trainedModels: s.models.trainedModels.map(m => + baseModels: s.models.baseModels.map(m => m.id === modelId ? { ...m, isDeployed: true } : m, ), productLines: s.models.productLines.map(pl => ({ @@ -892,6 +1066,17 @@ export const useGameStore = create()( }, })), + deployVariant: (familyId, variantId) => set((s) => ({ + models: { + ...s.models, + families: s.models.families.map(f => + f.id === familyId + ? { ...f, variants: f.variants.map(v => v.id === variantId ? { ...v, isDeployed: true } : v) } + : f, + ), + }, + })), + setProductPricing: (productLineId, field, value) => set((s) => ({ models: { ...s.models, @@ -996,15 +1181,6 @@ export const useGameStore = create()( }, })), - setModelTuning: (modelId, tuning) => set((s) => ({ - models: { - ...s.models, - trainedModels: s.models.trainedModels.map(m => - m.id === modelId ? { ...m, tuning: { ...m.tuning, ...tuning } } : m, - ), - }, - })), - acquireCompetitor: (competitorId) => set((s) => { const rival = s.competitors.rivals.find(r => r.id === competitorId); if (!rival || rival.status === 'acquired') return s; @@ -1058,7 +1234,7 @@ export const useGameStore = create()( notifications: [{ id: uuid(), title: 'Save Reset', - message: 'Your save was reset due to a major rack system overhaul — 20 SKUs with training/inference specialization, VRAM, cooling tech, interconnects, and AMD/ASIC vendors!', + message: 'Your save was reset due to a major model system overhaul — multi-stage training pipelines, model families with variants, benchmarks, and architecture choices!', type: 'info' as const, tick: 0, read: false, diff --git a/packages/game-engine/src/data/achievements.ts b/packages/game-engine/src/data/achievements.ts index d5e1484..e3beab9 100644 --- a/packages/game-engine/src/data/achievements.ts +++ b/packages/game-engine/src/data/achievements.ts @@ -13,7 +13,7 @@ export const ACHIEVEMENT_DEFINITIONS: AchievementDefinition[] = [ name: 'Hello World', description: 'Train your first AI model.', icon: 'Brain', - condition: { field: 'models.trainedModels.length', operator: 'gte', value: 1 }, + condition: { field: 'models.baseModels.length', operator: 'gte', value: 1 }, }, { id: 'first-deploy', diff --git a/packages/game-engine/src/data/benchmarks.ts b/packages/game-engine/src/data/benchmarks.ts new file mode 100644 index 0000000..0ae5adf --- /dev/null +++ b/packages/game-engine/src/data/benchmarks.ts @@ -0,0 +1,111 @@ +import type { BenchmarkDefinition } from '@ai-tycoon/shared'; + +export const BENCHMARKS: BenchmarkDefinition[] = [ + { + id: 'arc-challenge', + name: 'ARC Challenge', + category: 'reasoning', + description: 'Advanced reasoning and comprehension tasks requiring multi-step inference.', + primaryCapability: 'reasoning', + secondaryCapability: 'knowledge', + computeCost: 0.001, + ticksToRun: 8, + unlockedAtEra: 'startup', + marketRelevance: { consumer: 0.3, enterprise: 0.5, developer: 0.4, research: 0.8 }, + }, + { + id: 'codeforce', + name: 'CodeForce', + category: 'coding', + description: 'Competitive programming and software engineering benchmarks.', + primaryCapability: 'coding', + secondaryCapability: 'reasoning', + computeCost: 0.001, + ticksToRun: 8, + unlockedAtEra: 'startup', + marketRelevance: { consumer: 0.2, enterprise: 0.7, developer: 0.9, research: 0.5 }, + }, + { + id: 'mathquest', + name: 'MathQuest', + category: 'math', + description: 'Mathematical problem-solving from algebra to graduate-level proofs.', + primaryCapability: 'math', + secondaryCapability: 'reasoning', + computeCost: 0.001, + ticksToRun: 8, + unlockedAtEra: 'startup', + marketRelevance: { consumer: 0.1, enterprise: 0.6, developer: 0.5, research: 0.9 }, + }, + { + id: 'worldfacts', + name: 'WorldFacts', + category: 'knowledge', + description: 'Broad factual knowledge across science, history, culture, and current events.', + primaryCapability: 'knowledge', + secondaryCapability: 'reasoning', + computeCost: 0.001, + ticksToRun: 6, + unlockedAtEra: 'startup', + marketRelevance: { consumer: 0.5, enterprise: 0.4, developer: 0.3, research: 0.6 }, + }, + { + id: 'chatrank', + name: 'ChatRank', + category: 'chat', + description: 'Human preference evaluation of conversational quality, helpfulness, and creativity.', + primaryCapability: 'creative', + secondaryCapability: 'knowledge', + computeCost: 0.002, + ticksToRun: 10, + unlockedAtEra: 'startup', + marketRelevance: { consumer: 0.9, enterprise: 0.3, developer: 0.2, research: 0.2 }, + }, + { + id: 'harmguard', + name: 'HarmGuard', + category: 'safety', + description: 'Safety evaluation measuring harm avoidance, truthfulness, and responsible behavior.', + primaryCapability: 'reasoning', + computeCost: 0.001, + ticksToRun: 8, + unlockedAtEra: 'startup', + marketRelevance: { consumer: 0.4, enterprise: 0.9, developer: 0.3, research: 0.7 }, + }, + { + id: 'visionbench', + name: 'VisionBench', + category: 'multimodal', + description: 'Image understanding, visual reasoning, and multimodal comprehension.', + primaryCapability: 'multimodal', + secondaryCapability: 'reasoning', + computeCost: 0.003, + ticksToRun: 12, + unlockedAtEra: 'scaleup', + marketRelevance: { consumer: 0.5, enterprise: 0.6, developer: 0.6, research: 0.7 }, + }, + { + id: 'agentarena', + name: 'AgentArena', + category: 'agents', + description: 'Autonomous agent tasks: tool use, multi-step planning, and environment interaction.', + primaryCapability: 'agents', + secondaryCapability: 'coding', + computeCost: 0.005, + ticksToRun: 15, + unlockedAtEra: 'bigtech', + marketRelevance: { consumer: 0.3, enterprise: 0.8, developer: 0.7, research: 0.6 }, + }, + { + id: 'frontier-eval', + name: 'Frontier Eval', + category: 'reasoning', + description: 'Cutting-edge capability evaluation at the frontier of AI research.', + primaryCapability: 'reasoning', + secondaryCapability: 'math', + computeCost: 0.01, + ticksToRun: 20, + unlockedAtEra: 'agi', + marketRelevance: { consumer: 0.2, enterprise: 0.5, developer: 0.5, research: 1.0 }, + }, +]; diff --git a/packages/game-engine/src/index.ts b/packages/game-engine/src/index.ts index 40513c3..67caf4e 100644 --- a/packages/game-engine/src/index.ts +++ b/packages/game-engine/src/index.ts @@ -8,3 +8,4 @@ export { TECH_TREE } from './data/techTree'; export { INITIAL_RIVALS } from './data/competitors'; export { KEY_HIRE_POOL } from './data/keyHires'; export { ACHIEVEMENT_DEFINITIONS } from './data/achievements'; +export { BENCHMARKS } from './data/benchmarks'; diff --git a/packages/game-engine/src/systems/achievementSystem.ts b/packages/game-engine/src/systems/achievementSystem.ts index 5ccfad4..77cd906 100644 --- a/packages/game-engine/src/systems/achievementSystem.ts +++ b/packages/game-engine/src/systems/achievementSystem.ts @@ -9,7 +9,7 @@ const ERA_INDEX: Record = { startup: 0, scaleup: 1, bigtech: 2, function getFieldValue(state: GameState, field: string): number { if (field === 'meta._eraIndex') return ERA_INDEX[state.meta.currentEra] ?? 0; - if (field === 'meta._deployedModelCount') return state.models.trainedModels.filter(m => m.isDeployed).length; + if (field === 'meta._deployedModelCount') return state.models.baseModels.filter(m => m.isDeployed).length; const parts = field.split('.'); let current: unknown = state; for (const part of parts) { diff --git a/packages/game-engine/src/systems/competitorSystem.ts b/packages/game-engine/src/systems/competitorSystem.ts index 6c7d60c..ee5f7fe 100644 --- a/packages/game-engine/src/systems/competitorSystem.ts +++ b/packages/game-engine/src/systems/competitorSystem.ts @@ -43,7 +43,7 @@ export function processCompetitors(state: GameState): CompetitorState { const allCaps = [ ...rivals.filter(r => r.status === 'active').map(r => r.estimatedCapability), - state.models.trainedModels.reduce((best, m) => Math.max(best, m.benchmarkScore), 0), + state.models.bestDeployedModelScore, ]; const industryBenchmark = allCaps.length > 0 ? Math.max(...allCaps) : 0; diff --git a/packages/game-engine/src/systems/economySystem.ts b/packages/game-engine/src/systems/economySystem.ts index 8a8969a..21387ce 100644 --- a/packages/game-engine/src/systems/economySystem.ts +++ b/packages/game-engine/src/systems/economySystem.ts @@ -22,7 +22,7 @@ export function processEconomy( const talentExpenses = state.talent.totalSalaryPerTick; const dataExpenses = state.data.partnerships.reduce((sum, p) => sum + p.costPerTick, 0); - const bestCapability = state.models.trainedModels.reduce((best, m) => Math.max(best, m.benchmarkScore), 0); + const bestCapability = state.models.bestDeployedModelScore; const eraIdx = ['startup', 'scaleup', 'bigtech', 'agi'].indexOf(state.meta.currentEra); const complianceCost = bestCapability > 30 ? bestCapability * REGULATION_COMPLIANCE_PER_CAPABILITY * (1 + eraIdx * 0.5) / 100 : 0; diff --git a/packages/game-engine/src/systems/eraSystem.ts b/packages/game-engine/src/systems/eraSystem.ts index f8a7fe6..b210ac4 100644 --- a/packages/game-engine/src/systems/eraSystem.ts +++ b/packages/game-engine/src/systems/eraSystem.ts @@ -11,9 +11,7 @@ export function checkEraTransition(state: GameState): Era | null { const thresholds = ERA_THRESHOLDS[nextEra as keyof typeof ERA_THRESHOLDS]; if (!thresholds) return null; - const bestModel = state.models.trainedModels.reduce( - (best, m) => Math.max(best, m.benchmarkScore), 0, - ); + const bestModel = state.models.bestDeployedModelScore; if ( state.economy.totalRevenue >= thresholds.revenue && diff --git a/packages/game-engine/src/systems/fundingSystem.ts b/packages/game-engine/src/systems/fundingSystem.ts index d34fbf9..943ea05 100644 --- a/packages/game-engine/src/systems/fundingSystem.ts +++ b/packages/game-engine/src/systems/fundingSystem.ts @@ -35,9 +35,6 @@ export function canRaiseFunding(state: GameState): { canRaise: boolean; nextRoun export function computeValuation(state: GameState): number { const revenueMultiple = state.economy.revenuePerTick * 86400 * 365; const subscriberValue = state.market.consumers.totalSubscribers * 500; - const capabilityValue = Math.pow( - Math.max(...state.models.trainedModels.map(m => m.benchmarkScore), 0), - 2, - ) * 1000; + const capabilityValue = Math.pow(state.models.bestDeployedModelScore, 2) * 1000; return Math.max(100_000, revenueMultiple * 10 + subscriberValue + capabilityValue); } diff --git a/packages/game-engine/src/systems/marketSystem.ts b/packages/game-engine/src/systems/marketSystem.ts index c337bb7..3fc9ccf 100644 --- a/packages/game-engine/src/systems/marketSystem.ts +++ b/packages/game-engine/src/systems/marketSystem.ts @@ -1,4 +1,4 @@ -import type { GameState, MarketState } from '@ai-tycoon/shared'; +import type { GameState, MarketState, BenchmarkResult } from '@ai-tycoon/shared'; import { CONSUMER_BASE_GROWTH, CONSUMER_QUALITY_GROWTH_MULTIPLIER, @@ -13,6 +13,7 @@ import { MARKET_CAP_REPUTATION_BONUS, OVERLOAD_PENALTY_EXPONENT, } from '@ai-tycoon/shared'; +import { BENCHMARKS } from '../data/benchmarks'; export interface MarketTickResult { marketState: MarketState; @@ -21,12 +22,39 @@ export interface MarketTickResult { totalTokenDemand: number; } -export function processMarket(state: GameState, currentTickCapacity: number): MarketTickResult { - const bestModel = state.models.trainedModels - .filter(m => m.isDeployed) - .sort((a, b) => b.benchmarkScore - a.benchmarkScore)[0]; +function getSegmentQuality( + segment: 'consumer' | 'enterprise' | 'developer' | 'research', + benchmarkResults: BenchmarkResult[], + fallbackScore: number, +): number { + if (benchmarkResults.length === 0) return fallbackScore / 100; - const modelQuality = bestModel ? bestModel.benchmarkScore / 100 : 0; + const bestByBenchmark = new Map(); + for (const r of benchmarkResults) { + const prev = bestByBenchmark.get(r.benchmarkId) ?? 0; + if (r.score > prev) bestByBenchmark.set(r.benchmarkId, r.score); + } + + let weightedSum = 0; + let totalWeight = 0; + for (const bench of BENCHMARKS) { + const score = bestByBenchmark.get(bench.id); + if (score == null) continue; + const weight = bench.marketRelevance[segment]; + weightedSum += (score / 100) * weight; + totalWeight += weight; + } + + if (totalWeight === 0) return fallbackScore / 100; + return weightedSum / totalWeight; +} + +export function processMarket(state: GameState, currentTickCapacity: number): MarketTickResult { + const consumerQuality = getSegmentQuality('consumer', state.models.benchmarkResults, state.models.bestDeployedModelScore); + const enterpriseQuality = getSegmentQuality('enterprise', state.models.benchmarkResults, state.models.bestDeployedModelScore); + const modelQuality = state.models.benchmarkResults.length > 0 + ? (consumerQuality + enterpriseQuality) / 2 + : state.models.bestDeployedModelScore / 100; const chatProduct = state.models.productLines.find(p => p.type === 'chat-product'); const textApi = state.models.productLines.find(p => p.type === 'text-api'); @@ -34,7 +62,7 @@ export function processMarket(state: GameState, currentTickCapacity: number): Ma const consumers = { ...state.market.consumers }; let subscriptionRevenue = 0; - if (chatProduct?.isActive && bestModel) { + if (chatProduct?.isActive && modelQuality > 0) { const price = chatProduct.pricing.subscriptionPrice; const fairPrice = 20 + modelQuality * 80; const priceRatio = price / Math.max(1, fairPrice); @@ -109,7 +137,7 @@ export function processMarket(state: GameState, currentTickCapacity: number): Ma let apiRevenue = 0; let organicApiTokens = 0; - if (textApi?.isActive && bestModel) { + if (textApi?.isActive && modelQuality > 0) { const reputationFactor = state.reputation.score / 100; const qualityFactor = modelQuality; const priceFactor = Math.max(0.1, 1 - (textApi.pricing.outputTokenPrice / 20)); diff --git a/packages/game-engine/src/systems/modelSystem.ts b/packages/game-engine/src/systems/modelSystem.ts index 4352487..22e7184 100644 --- a/packages/game-engine/src/systems/modelSystem.ts +++ b/packages/game-engine/src/systems/modelSystem.ts @@ -1,21 +1,40 @@ -import type { GameState, ModelsState, TrainedModel, ModelCapabilities } from '@ai-tycoon/shared'; -import { uuid, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared'; +import type { + GameState, ModelsState, BaseModel, ModelCapabilities, SafetyProfile, + TrainingPipeline, TrainingEvent, TrainingEventType, + ModelVariant, VariantCreationJob, EvalJob, BenchmarkResult, + BenchmarkDefinition, +} from '@ai-tycoon/shared'; +import { BENCHMARKS } from '../data/benchmarks'; +import { + uuid, VRAM_REQUIREMENTS_BY_GENERATION, + SFT_TIME_FRACTION, SFT_COMPUTE_FRACTION, + ALIGNMENT_TIME_FRACTION, ALIGNMENT_COMPUTE_FRACTION, + MOE_CAPABILITY_MULTIPLIER, MOE_SPEED_MULTIPLIER, + EVENT_BASE_PROBABILITY, + LOSS_SPIKE_DELAY_MIN, LOSS_SPIKE_DELAY_MAX, + INSTABILITY_PROGRESS_LOSS_MIN, INSTABILITY_PROGRESS_LOSS_MAX, + BREAKTHROUGH_CAPABILITY_BONUS_MIN, BREAKTHROUGH_CAPABILITY_BONUS_MAX, + EMERGENT_CAPABILITY_THRESHOLDS, + ALIGNMENT_METHODS, + SFT_SPECIALIZATION_BONUSES, + QUANTIZATION_CONFIGS, + DISTILLATION_BASE_RETENTION, + QUANTIZATION_TICKS, +} from '@ai-tycoon/shared'; export interface ModelTickResult { modelsState: ModelsState; - modelCompleted: TrainedModel | null; + completedModels: BaseModel[]; + notifications: { title: string; message: string; type: 'success' | 'warning' | 'info' }[]; } export function processModels(state: GameState): ModelTickResult { - const active = state.models.activeTraining; - if (!active) { - return { modelsState: state.models, modelCompleted: null }; - } + const completedModels: BaseModel[] = []; + const notifications: ModelTickResult['notifications'] = []; + let baseModels = [...state.models.baseModels]; + let families = [...state.models.families]; - const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[active.generation] ?? 0; - if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) { - return { modelsState: state.models, modelCompleted: null }; - } + const totalTrainingFlops = state.compute.totalTrainingFlops * state.compute.trainingAllocation; const researcherBoost = state.talent.departments.research.headcount * state.talent.departments.research.effectiveness; @@ -23,82 +42,487 @@ export function processModels(state: GameState): ModelTickResult { state.talent.departments.engineering.effectiveness; const speedMultiplier = 1 + (researcherBoost + engineerBoost) * 0.05; - const newProgress = active.progressTicks + speedMultiplier; + const updatedPipelines: TrainingPipeline[] = []; - if (newProgress >= active.totalTicks) { - const model = createTrainedModel(active.modelName, active.generation, active.allocatedCompute, active.allocatedDataTokens, state); + for (const pipeline of state.models.activeTrainingPipelines) { + if (pipeline.status !== 'active') { + updatedPipelines.push(pipeline); + continue; + } - return { - modelsState: { - ...state.models, - trainedModels: [...state.models.trainedModels, model], - activeTraining: null, - }, - modelCompleted: model, - }; + const generation = families.find(f => f.id === pipeline.familyId)?.generation ?? 1; + const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[generation] ?? 0; + if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) { + updatedPipelines.push({ ...pipeline, status: 'stalled' }); + continue; + } + + const effectiveFlops = totalTrainingFlops * pipeline.allocatedComputeFraction; + let updated = { ...pipeline, events: [...pipeline.events] }; + + if (pipeline.currentStage === 'pretraining') { + const stage = { ...pipeline.stages.pretraining }; + const newProgress = stage.progressTicks + speedMultiplier; + + const events = generateTrainingEvents(pipeline, state); + let tickDelay = 0; + let progressLost = 0; + for (const event of events) { + updated.events.push(event); + if (event.type === 'loss_spike') { + tickDelay += event.impact.ticksDelayed ?? 0; + notifications.push({ title: 'Loss Spike', message: `${pipeline.modelName}: Training loss spiked! Delayed ${event.impact.ticksDelayed} ticks.`, type: 'warning' }); + } else if (event.type === 'instability') { + progressLost += event.impact.progressLost ?? 0; + notifications.push({ title: 'Training Instability', message: `${pipeline.modelName}: Rolled back to checkpoint. Lost ${Math.round((event.impact.progressLost ?? 0) * 100)}% progress.`, type: 'warning' }); + } else if (event.type === 'breakthrough') { + notifications.push({ title: 'Breakthrough!', message: `${pipeline.modelName}: Unexpected capability jump in ${event.impact.capabilityDomain}!`, type: 'success' }); + } else if (event.type === 'hardware_failure') { + tickDelay += event.impact.ticksDelayed ?? 0; + notifications.push({ title: 'Hardware Failure', message: `${pipeline.modelName}: GPU failure during training. Recovering from checkpoint.`, type: 'warning' }); + } else if (event.type === 'data_contamination') { + notifications.push({ title: 'Data Contamination', message: `${pipeline.modelName}: Copyright concerns detected in training data.`, type: 'warning' }); + } + } + + const effectiveProgress = Math.max(0, newProgress - tickDelay - (stage.totalTicks * progressLost)); + stage.progressTicks = effectiveProgress; + stage.computeAllocated = effectiveFlops; + stage.lossValue = Math.max(0.01, 10 * Math.exp(-stage.progressTicks / stage.totalTicks * 3)); + + if (stage.progressTicks >= stage.totalTicks) { + stage.isComplete = true; + stage.progressTicks = stage.totalTicks; + + if (updated.stages.sft) { + updated.currentStage = 'sft'; + notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to supervised fine-tuning.`, type: 'info' }); + } else if (updated.stages.alignment) { + updated.currentStage = 'alignment'; + notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' }); + } else { + const model = createBaseModel(updated, state); + baseModels = [...baseModels, model]; + families = families.map(f => + f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f, + ); + completedModels.push(model); + updated.status = 'completed'; + } + } + updated = { ...updated, stages: { ...updated.stages, pretraining: stage } }; + } else if (pipeline.currentStage === 'sft' && pipeline.stages.sft) { + const stage = { ...pipeline.stages.sft }; + stage.progressTicks += speedMultiplier; + + if (stage.progressTicks >= stage.totalTicks) { + stage.isComplete = true; + stage.progressTicks = stage.totalTicks; + + if (updated.stages.alignment) { + updated.currentStage = 'alignment'; + notifications.push({ title: 'SFT Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' }); + } else { + const model = createBaseModel(updated, state); + baseModels = [...baseModels, model]; + families = families.map(f => + f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f, + ); + completedModels.push(model); + updated.status = 'completed'; + } + } + updated = { ...updated, stages: { ...updated.stages, sft: stage } }; + } else if (pipeline.currentStage === 'alignment' && pipeline.stages.alignment) { + const stage = { ...pipeline.stages.alignment }; + stage.progressTicks += speedMultiplier; + + if (stage.progressTicks >= stage.totalTicks) { + stage.isComplete = true; + stage.progressTicks = stage.totalTicks; + + const model = createBaseModel(updated, state); + baseModels = [...baseModels, model]; + families = families.map(f => + f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f, + ); + completedModels.push(model); + updated.status = 'completed'; + } + updated = { ...updated, stages: { ...updated.stages, alignment: stage } }; + } + + updatedPipelines.push(updated); } + const updatedVariantJobs = processVariantJobs(state, speedMultiplier); + for (const variant of updatedVariantJobs.newVariants) { + variant.createdAtTick = state.meta.tickCount; + families = families.map(f => + f.id === variant.familyId ? { ...f, variants: [...f.variants, variant] } : f, + ); + notifications.push({ + title: 'Variant Created', + message: `${variant.name} (${variant.variantType}) is ready!`, + type: 'success', + }); + } + + const updatedEvalJobs = processEvalJobs(state); + + const allDeployed = [ + ...baseModels.filter(m => m.isDeployed), + ...families.flatMap(f => f.variants.filter(v => v.isDeployed)), + ]; + + const bestDeployedModelScore = allDeployed.reduce((best, m) => + Math.max(best, 'rawCapability' in m ? m.rawCapability : computeVariantScore(m)), 0); + + const bestDeployedSafetyScore = allDeployed.reduce((best, m) => + Math.max(best, m.safetyProfile.overallSafety), 0); + return { modelsState: { ...state.models, - activeTraining: { ...active, progressTicks: newProgress }, + baseModels, + families, + activeTrainingPipelines: updatedPipelines, + variantJobs: updatedVariantJobs.jobs, + evalJobs: updatedEvalJobs.jobs, + benchmarkResults: [...state.models.benchmarkResults, ...updatedEvalJobs.newResults], + bestDeployedModelScore, + bestDeployedSafetyScore, }, - modelCompleted: null, + completedModels, + notifications, }; } -function createTrainedModel( - name: string, - generation: number, - compute: number, - dataTokens: number, +function generateTrainingEvents(pipeline: TrainingPipeline, state: GameState): TrainingEvent[] { + const events: TrainingEvent[] = []; + const params = pipeline.architecture.totalParameters; + const baseProbability = EVENT_BASE_PROBABILITY * Math.log10(Math.max(1, params)); + + const hasInterpretability = state.research.completedResearch.includes('interpretability'); + const hasDataPipeline = state.research.completedResearch.includes('data-pipeline'); + const hasRedundancy = state.research.completedResearch.includes('redundancy-protocols'); + + if (Math.random() < baseProbability * 2.0) { + const delay = LOSS_SPIKE_DELAY_MIN + Math.floor(Math.random() * (LOSS_SPIKE_DELAY_MAX - LOSS_SPIKE_DELAY_MIN)); + events.push({ + id: uuid(), type: 'loss_spike', tick: state.meta.tickCount, + severity: delay > 15 ? 'major' : delay > 10 ? 'moderate' : 'minor', + description: `Training loss spiked to ${(Math.random() * 5 + 2).toFixed(2)}`, + resolved: true, + impact: { ticksDelayed: delay }, + }); + } + + if (params > 10 && Math.random() < baseProbability * (hasInterpretability ? 0.25 : 0.5)) { + const loss = INSTABILITY_PROGRESS_LOSS_MIN + Math.random() * (INSTABILITY_PROGRESS_LOSS_MAX - INSTABILITY_PROGRESS_LOSS_MIN); + events.push({ + id: uuid(), type: 'instability', tick: state.meta.tickCount, + severity: loss > 0.12 ? 'major' : 'moderate', + description: 'Training run became unstable. Rolling back to last checkpoint.', + resolved: true, + impact: { progressLost: loss }, + }); + } + + const chinchillaRatio = pipeline.stages.pretraining.chinchillaRatio; + if (params > 30 && chinchillaRatio > 15 && Math.random() < baseProbability * 0.3) { + const capDomains: (keyof ModelCapabilities)[] = ['reasoning', 'coding', 'creative', 'math', 'knowledge', 'agents']; + const domain = capDomains[Math.floor(Math.random() * capDomains.length)]; + const bonus = BREAKTHROUGH_CAPABILITY_BONUS_MIN + Math.floor(Math.random() * (BREAKTHROUGH_CAPABILITY_BONUS_MAX - BREAKTHROUGH_CAPABILITY_BONUS_MIN)); + events.push({ + id: uuid(), type: 'breakthrough', tick: state.meta.tickCount, + severity: 'major', + description: `Unexpected capability jump in ${domain}!`, + resolved: true, + impact: { capabilityBonus: bonus, capabilityDomain: domain }, + }); + } + + for (const [thresholdStr, capName] of Object.entries(EMERGENT_CAPABILITY_THRESHOLDS)) { + const threshold = Number(thresholdStr); + const prevProgress = pipeline.stages.pretraining.progressTicks; + const progressRatio = prevProgress / pipeline.stages.pretraining.totalTicks; + if (params >= threshold && progressRatio > 0.5 && progressRatio < 0.55) { + events.push({ + id: uuid(), type: 'emergent_capability', tick: state.meta.tickCount, + severity: 'major', + description: `Model developed ${capName} capability!`, + resolved: true, + impact: { capabilityBonus: 10, capabilityDomain: 'reasoning' }, + }); + } + } + + const avgLegalRisk = state.data.ownedDatasets.length > 0 + ? state.data.ownedDatasets.reduce((sum, d) => sum + d.legalRisk, 0) / state.data.ownedDatasets.length + : 0; + if (Math.random() < baseProbability * (hasDataPipeline ? 0.25 : 0.5) * avgLegalRisk) { + events.push({ + id: uuid(), type: 'data_contamination', tick: state.meta.tickCount, + severity: 'moderate', + description: 'Copyright holders identified content in training data.', + resolved: true, + impact: {}, + }); + } + + if (Math.random() < baseProbability * (hasRedundancy ? 0.1 : 0.2)) { + const delay = 10 + Math.floor(Math.random() * 20); + events.push({ + id: uuid(), type: 'hardware_failure', tick: state.meta.tickCount, + severity: delay > 20 ? 'major' : 'moderate', + description: 'GPU cluster failure during training. Recovering from checkpoint.', + resolved: true, + impact: { ticksDelayed: delay }, + }); + } + + return events; +} + +function createBaseModel( + pipeline: TrainingPipeline, state: GameState, -): TrainedModel { +): BaseModel { + const { architecture, dataMix } = pipeline; + const compute = pipeline.stages.pretraining.computeAllocated; + const dataTokens = pipeline.stages.pretraining.targetTokens; + const computeFactor = Math.sqrt(compute) * 5; const dataFactor = Math.log10(1 + dataTokens / 1e8) * 10; const researchBonus = state.research.completedResearch.length * 3; const efficiencyBonus = state.research.completedResearch.filter(r => r.includes('efficiency')).length * 5; - const baseCapability = Math.min(95, computeFactor + dataFactor + researchBonus + efficiencyBonus); + let rawCapability = Math.min(95, computeFactor + dataFactor + researchBonus + efficiencyBonus); + + if (architecture.type === 'moe') { + rawCapability = Math.min(98, rawCapability * MOE_CAPABILITY_MULTIPLIER); + } const researcherQuality = state.talent.departments.research.effectiveness; + const capabilities: ModelCapabilities = { - reasoning: clamp(baseCapability * (0.8 + Math.random() * 0.4) * (1 + researcherQuality * 0.2)), - coding: clamp(baseCapability * (0.7 + Math.random() * 0.5)), - creative: clamp(baseCapability * (0.6 + Math.random() * 0.6)), - multimodal: clamp(baseCapability * (0.3 + Math.random() * 0.3)), - agents: clamp(baseCapability * (0.2 + Math.random() * 0.3)), - speed: Math.max(1, 100 - compute * 0.5 + efficiencyBonus * 2), + reasoning: clamp(rawCapability * (0.6 + dataMix.scientific * 0.5 + dataMix.code * 0.3) * (1 + researcherQuality * 0.2)), + coding: clamp(rawCapability * (0.5 + dataMix.code * 1.0)), + creative: clamp(rawCapability * (0.4 + dataMix.books * 0.6 + dataMix.conversation * 0.3)), + math: clamp(rawCapability * (0.3 + dataMix.scientific * 0.7 + dataMix.code * 0.2)), + knowledge: clamp(rawCapability * (0.5 + dataMix.web * 0.3 + dataMix.books * 0.3)), + multimodal: clamp(rawCapability * (dataMix.images * 0.5 + dataMix.video * 0.4 + dataMix.audio * 0.2)), + agents: clamp(rawCapability * (0.2 + dataMix.code * 0.3 + dataMix.conversation * 0.2)), + speed: Math.max(1, 100 - architecture.totalParameters * 0.3 + efficiencyBonus * 2 + (architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER * 10 : 0)), + contextUtilization: Math.min(100, architecture.contextWindow * 0.4), }; + const breakthroughBonuses: Partial> = {}; + for (const event of pipeline.events) { + if ((event.type === 'breakthrough' || event.type === 'emergent_capability') && event.impact.capabilityDomain && event.impact.capabilityBonus) { + const domain = event.impact.capabilityDomain; + breakthroughBonuses[domain] = (breakthroughBonuses[domain] ?? 0) + event.impact.capabilityBonus; + } + } + for (const [domain, bonus] of Object.entries(breakthroughBonuses)) { + const key = domain as keyof ModelCapabilities; + capabilities[key] = clamp(capabilities[key] + bonus); + } + + const completedStages: ('pretraining' | 'sft' | 'alignment')[] = ['pretraining']; + + if (pipeline.stages.sft?.isComplete) { + completedStages.push('sft'); + const sft = pipeline.stages.sft; + for (let i = 0; i < sft.specializations.length; i++) { + const spec = sft.specializations[i]; + const bonuses = SFT_SPECIALIZATION_BONUSES[spec]; + if (!bonuses) continue; + const diminishing = i === 0 ? 1.0 : i === 1 ? 0.7 : 0.4; + for (const [cap, value] of Object.entries(bonuses)) { + const key = cap as keyof ModelCapabilities; + capabilities[key] = clamp(capabilities[key] + value * diminishing); + } + } + } + const safetyResearch = state.research.completedResearch.filter( r => r.includes('alignment') || r.includes('interpretability') || r.includes('constitutional'), ).length; - const safetyScore = Math.min(100, 30 + safetyResearch * 15 + Math.random() * 10); + let overallSafety = Math.min(100, 30 + safetyResearch * 15 + Math.random() * 10); + let refusalRate = overallSafety > 60 ? 0.1 : 0.03; - const safetyPenalty = safetyScore > 60 ? (safetyScore - 60) * 0.1 : 0; - const benchmarkScore = Math.max(0, - (capabilities.reasoning * 0.3 + capabilities.coding * 0.25 + - capabilities.creative * 0.2 + capabilities.multimodal * 0.15 + capabilities.agents * 0.1) - safetyPenalty, - ); + if (pipeline.stages.alignment?.isComplete) { + completedStages.push('alignment'); + const alignment = pipeline.stages.alignment; + const methodConfig = ALIGNMENT_METHODS[alignment.method]; + if (methodConfig) { + const safetyGain = methodConfig.safetyGain * alignment.safetyWeight; + overallSafety = Math.min(100, overallSafety + safetyGain); + refusalRate = methodConfig.baseRefusal * Math.pow(alignment.safetyWeight, 1.5); + const capLoss = methodConfig.capabilityLoss * alignment.safetyWeight * 0.5; + for (const key of Object.keys(capabilities) as (keyof ModelCapabilities)[]) { + if (key !== 'speed' && key !== 'contextUtilization') { + capabilities[key] = clamp(capabilities[key] - capLoss); + } + } + } + } - const parameterCount = Math.pow(10, generation) * (0.5 + Math.random()); + const safetyProfile: SafetyProfile = { + overallSafety, + refusalRate, + harmAvoidance: overallSafety, + instructionFollowing: capabilities.reasoning * 0.8, + honesty: overallSafety * 0.9, + }; return { id: uuid(), - name, - generation, - parameterCount, - trainingDataSize: dataTokens, + familyId: pipeline.familyId, + name: pipeline.modelName, + architecture, + dataMix, capabilities, - safetyScore, - benchmarkScore, - tuning: { preset: 'helpful-safe' }, + safetyProfile, + rawCapability, isDeployed: false, trainedAtTick: state.meta.tickCount, + trainingCostTotal: compute, + trainingStagesCompleted: completedStages, }; } +function processVariantJobs( + state: GameState, + speedMultiplier: number, +): { jobs: VariantCreationJob[]; newVariants: ModelVariant[] } { + const newVariants: ModelVariant[] = []; + const jobs = state.models.variantJobs.map(job => { + if (job.status !== 'active') return job; + const newProgress = job.progressTicks + speedMultiplier; + if (newProgress >= job.totalTicks) { + const baseModel = state.models.baseModels.find(m => m.id === job.baseModelId); + if (baseModel) { + const variant = createVariant(job, baseModel); + newVariants.push(variant); + } + return { ...job, status: 'completed' as const, progressTicks: job.totalTicks }; + } + return { ...job, progressTicks: newProgress }; + }); + return { jobs, newVariants }; +} + +function createVariant(job: VariantCreationJob, base: BaseModel): ModelVariant { + const caps = { ...base.capabilities }; + let costMultiplier = 1.0; + let speedMultiplier = 1.0; + let variantName = base.name; + let arch = { ...base.architecture }; + + if (job.jobType === 'distillation' && 'targetParameters' in job.config) { + const config = job.config; + const sizeRatio = config.targetParameters / base.architecture.totalParameters; + const retention = DISTILLATION_BASE_RETENTION + sizeRatio * 0.25; + for (const key of Object.keys(caps) as (keyof ModelCapabilities)[]) { + caps[key] = clamp(caps[key] * retention); + } + costMultiplier = sizeRatio * 0.8; + speedMultiplier = (1 / sizeRatio) * 0.7; + arch = { ...arch, totalParameters: config.targetParameters, activeParameters: config.targetParameters }; + variantName = config.variantName; + } else if (job.jobType === 'fine-tuning' && 'specialization' in job.config) { + const config = job.config; + const bonuses = SFT_SPECIALIZATION_BONUSES[config.specialization]; + if (bonuses) { + for (const [cap, value] of Object.entries(bonuses)) { + caps[cap as keyof ModelCapabilities] = clamp(caps[cap as keyof ModelCapabilities] + value); + } + } + variantName = config.variantName; + } else if (job.jobType === 'quantization' && 'level' in job.config) { + const config = job.config; + const qConfig = QUANTIZATION_CONFIGS[config.level]; + if (qConfig) { + for (const key of Object.keys(caps) as (keyof ModelCapabilities)[]) { + if (key !== 'speed') caps[key] = clamp(caps[key] * qConfig.qualityRetention); + } + caps.speed = clamp(caps.speed * qConfig.speedMultiplier); + costMultiplier = qConfig.costMultiplier; + speedMultiplier = qConfig.speedMultiplier; + } + variantName = config.variantName; + } + + return { + id: uuid(), + familyId: base.familyId, + baseModelId: base.id, + name: variantName, + variantType: job.jobType === 'distillation' ? 'distilled' : job.jobType === 'fine-tuning' ? 'fine-tuned' : 'quantized', + architecture: arch, + capabilities: caps, + safetyProfile: { ...base.safetyProfile }, + isDeployed: false, + createdAtTick: 0, + quantization: job.jobType === 'quantization' && 'level' in job.config ? job.config.level : undefined, + distillationRetention: job.jobType === 'distillation' && 'targetParameters' in job.config + ? DISTILLATION_BASE_RETENTION + (job.config.targetParameters / base.architecture.totalParameters) * 0.25 + : undefined, + finetuneSpecialization: job.jobType === 'fine-tuning' && 'specialization' in job.config ? job.config.specialization : undefined, + costMultiplier, + speedMultiplier, + }; +} + +function processEvalJobs(state: GameState): { jobs: EvalJob[]; newResults: BenchmarkResult[] } { + const newResults: BenchmarkResult[] = []; + const allModels: (BaseModel | ModelVariant)[] = [ + ...state.models.baseModels, + ...state.models.families.flatMap(f => f.variants), + ]; + + const jobs = state.models.evalJobs.map(job => { + if (job.status !== 'active') return job; + const newProgress = job.progressTicks + 1; + if (newProgress >= job.totalTicks) { + const model = allModels.find(m => m.id === job.modelId); + if (model) { + const results = computeBenchmarkScores(model, job.benchmarkIds, state.meta.tickCount); + newResults.push(...results); + return { ...job, status: 'completed' as const, progressTicks: job.totalTicks, results }; + } + return { ...job, status: 'completed' as const, progressTicks: job.totalTicks }; + } + return { ...job, progressTicks: newProgress }; + }); + return { jobs, newResults }; +} + +function computeBenchmarkScores( + model: BaseModel | ModelVariant, + benchmarkIds: string[], + tick: number, +): BenchmarkResult[] { + const benchmarkMap = new Map(BENCHMARKS.map(b => [b.id, b])); + return benchmarkIds.map(id => { + const bench = benchmarkMap.get(id); + if (!bench) return { benchmarkId: id, modelId: model.id, score: 0, ranAtTick: tick }; + const primary = model.capabilities[bench.primaryCapability] ?? 0; + const secondary = bench.secondaryCapability ? (model.capabilities[bench.secondaryCapability] ?? 0) : 0; + const noise = (Math.random() - 0.5) * 6; + const score = clamp(primary * 0.7 + secondary * 0.3 + noise); + return { benchmarkId: id, modelId: model.id, score, ranAtTick: tick }; + }); +} + +function computeVariantScore(variant: ModelVariant): number { + const c = variant.capabilities; + return (c.reasoning * 0.25 + c.coding * 0.2 + c.creative * 0.15 + c.math * 0.15 + c.knowledge * 0.15 + c.agents * 0.1); +} + function clamp(n: number): number { return Math.min(100, Math.max(0, n)); } diff --git a/packages/game-engine/src/systems/reputationSystem.ts b/packages/game-engine/src/systems/reputationSystem.ts index d9992da..2c8b2af 100644 --- a/packages/game-engine/src/systems/reputationSystem.ts +++ b/packages/game-engine/src/systems/reputationSystem.ts @@ -14,13 +14,9 @@ export interface ReputationTickResult { export function processReputation(state: GameState): ReputationState & { _safetyIncident?: boolean } { let { safetyRecord, publicPerception, employeeSatisfaction, regulatoryStanding } = state.reputation; - const bestModel = state.models.trainedModels - .filter(m => m.isDeployed) - .sort((a, b) => b.benchmarkScore - a.benchmarkScore)[0]; - let safetyIncident = false; - if (bestModel) { - const safetyLevel = bestModel.safetyScore; + if (state.models.bestDeployedSafetyScore > 0) { + const safetyLevel = state.models.bestDeployedSafetyScore; if (safetyLevel < LOW_SAFETY_THRESHOLD && state.meta.tickCount % 60 === 0) { const incidentProb = SAFETY_INCIDENT_PROBABILITY_BASE * (LOW_SAFETY_THRESHOLD - safetyLevel); if (Math.random() < incidentProb) { diff --git a/packages/game-engine/src/tick.ts b/packages/game-engine/src/tick.ts index c027d90..7c55b0e 100644 --- a/packages/game-engine/src/tick.ts +++ b/packages/game-engine/src/tick.ts @@ -40,13 +40,14 @@ export function processTick(state: GameState): Partial { const stateWithInfra = { ...state, infrastructure }; const modelResult = processModels(stateWithInfra); - if (modelResult.modelCompleted) { + for (const completed of modelResult.completedModels) { notifications.push({ title: 'Training Complete', - message: `${modelResult.modelCompleted.name} is ready! Benchmark: ${modelResult.modelCompleted.benchmarkScore.toFixed(1)}/100`, + message: `${completed.name} is ready! Capability: ${completed.rawCapability.toFixed(1)}/100`, type: 'success', }); } + notifications.push(...modelResult.notifications); const stateWithModels = { ...stateWithInfra, models: modelResult.modelsState }; diff --git a/packages/shared/src/constants/gameBalance.ts b/packages/shared/src/constants/gameBalance.ts index 6349ac2..2ec1a98 100644 --- a/packages/shared/src/constants/gameBalance.ts +++ b/packages/shared/src/constants/gameBalance.ts @@ -23,6 +23,66 @@ export const CAPABILITY_FORMULA = { efficiencyWeight: 0.1, }; +export const PRETRAINING_BASE_TICKS = 180; +export const SFT_TIME_FRACTION = 0.10; +export const SFT_COMPUTE_FRACTION = 0.06; +export const ALIGNMENT_TIME_FRACTION = 0.08; +export const ALIGNMENT_COMPUTE_FRACTION = 0.04; +export const CHINCHILLA_OPTIMAL_RATIO = 20; + +export const MAX_CONCURRENT_TRAINING: Record = { + startup: 1, scaleup: 2, bigtech: 4, agi: 8, +}; + +export const DISTILLATION_COMPUTE_FRACTION = 0.15; +export const DISTILLATION_TIME_FRACTION = 0.20; +export const DISTILLATION_BASE_RETENTION = 0.70; +export const FINETUNE_COMPUTE_FRACTION = 0.03; +export const FINETUNE_TIME_FRACTION = 0.08; +export const QUANTIZATION_TICKS = 8; + +export const MOE_CAPABILITY_MULTIPLIER = 1.15; +export const MOE_SPEED_MULTIPLIER = 1.3; +export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400]; +export const CONTEXT_WINDOW_OPTIONS = [4, 8, 32, 128, 256, 1024]; + +export const EVENT_BASE_PROBABILITY = 0.001; +export const LOSS_SPIKE_DELAY_MIN = 5; +export const LOSS_SPIKE_DELAY_MAX = 20; +export const INSTABILITY_PROGRESS_LOSS_MIN = 0.05; +export const INSTABILITY_PROGRESS_LOSS_MAX = 0.15; +export const BREAKTHROUGH_CAPABILITY_BONUS_MIN = 5; +export const BREAKTHROUGH_CAPABILITY_BONUS_MAX = 15; + +export const EMERGENT_CAPABILITY_THRESHOLDS: Record = { + 10: 'basic-reasoning', + 50: 'chain-of-thought', + 100: 'tool-use', + 500: 'long-horizon-planning', +}; + +export const QUANTIZATION_CONFIGS: Record = { + fp16: { qualityRetention: 1.00, speedMultiplier: 1.0, costMultiplier: 1.0 }, + int8: { qualityRetention: 0.97, speedMultiplier: 1.8, costMultiplier: 0.55 }, + int4: { qualityRetention: 0.90, speedMultiplier: 3.0, costMultiplier: 0.30 }, + int2: { qualityRetention: 0.75, speedMultiplier: 5.0, costMultiplier: 0.15 }, +}; + +export const ALIGNMENT_METHODS: Record = { + rlhf: { safetyGain: 25, capabilityLoss: 5, baseRefusal: 0.10, requiredResearch: 'alignment-research' }, + dpo: { safetyGain: 20, capabilityLoss: 2, baseRefusal: 0.05, requiredResearch: 'interpretability' }, + constitutional: { safetyGain: 30, capabilityLoss: 4, baseRefusal: 0.14, requiredResearch: 'constitutional-ai' }, +}; + +export const SFT_SPECIALIZATION_BONUSES: Record> = { + general: { reasoning: 5, coding: 5, creative: 5, math: 5, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 }, + code: { reasoning: 0, coding: 15, creative: -3, math: 8, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 }, + math: { reasoning: 8, coding: 0, creative: -3, math: 15, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 }, + creative: { reasoning: 0, coding: -3, creative: 15, math: 0, knowledge: 5, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 }, + multilingual: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 10, multimodal: 0, agents: 0, speed: -5, contextUtilization: 0 }, + 'tool-use': { reasoning: 0, coding: 8, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 15, speed: -5, contextUtilization: 0 }, +}; + export const CONSUMER_BASE_GROWTH = 0.002; export const CONSUMER_QUALITY_GROWTH_MULTIPLIER = 0.01; export const CONSUMER_PRICE_ELASTICITY = -0.5; diff --git a/packages/shared/src/types/gameState.ts b/packages/shared/src/types/gameState.ts index 30061d4..e12afb0 100644 --- a/packages/shared/src/types/gameState.ts +++ b/packages/shared/src/types/gameState.ts @@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = { sfxVolume: 0.7, }; -export const SAVE_VERSION = 5; +export const SAVE_VERSION = 6; diff --git a/packages/shared/src/types/models.ts b/packages/shared/src/types/models.ts index fba6582..40a6601 100644 --- a/packages/shared/src/types/models.ts +++ b/packages/shared/src/types/models.ts @@ -1,51 +1,239 @@ -export interface ModelsState { - trainedModels: TrainedModel[]; - activeTraining: TrainingJob | null; - productLines: ProductLine[]; +import type { Era } from './gameState'; +import type { DataDomain } from './data'; + +export type ArchitectureType = 'dense' | 'moe'; + +export interface ModelArchitecture { + type: ArchitectureType; + totalParameters: number; + activeParameters: number; + expertCount?: number; + expertTopK?: number; + contextWindow: number; + vocabularySize: number; } -export interface TrainedModel { +export type DataMixAllocation = Record; + +export type TrainingStage = 'pretraining' | 'sft' | 'alignment'; +export type TrainingJobStatus = 'active' | 'paused' | 'stalled' | 'completed' | 'failed'; + +export interface TrainingPipeline { id: string; - name: string; - generation: number; - parameterCount: number; - trainingDataSize: number; - capabilities: ModelCapabilities; - safetyScore: number; - benchmarkScore: number; - tuning: ModelTuning; - isDeployed: boolean; - trainedAtTick: number; + familyId: string; + modelName: string; + architecture: ModelArchitecture; + dataMix: DataMixAllocation; + currentStage: TrainingStage; + stages: { + pretraining: PreTrainingConfig; + sft: SFTConfig | null; + alignment: AlignmentConfig | null; + }; + status: TrainingJobStatus; + allocatedComputeFraction: number; + events: TrainingEvent[]; + startedAtTick: number; +} + +export interface PreTrainingConfig { + targetTokens: number; + processedTokens: number; + computeAllocated: number; + progressTicks: number; + totalTicks: number; + lossValue: number; + chinchillaRatio: number; + isComplete: boolean; +} + +export type SFTSpecialization = 'general' | 'code' | 'math' | 'creative' | 'multilingual' | 'tool-use'; + +export interface SFTConfig { + specializations: SFTSpecialization[]; + progressTicks: number; + totalTicks: number; + isComplete: boolean; +} + +export type AlignmentMethod = 'rlhf' | 'dpo' | 'constitutional'; + +export interface AlignmentConfig { + method: AlignmentMethod; + safetyWeight: number; + helpfulnessWeight: number; + progressTicks: number; + totalTicks: number; + isComplete: boolean; +} + +export type TrainingEventType = + | 'loss_spike' + | 'instability' + | 'breakthrough' + | 'emergent_capability' + | 'data_contamination' + | 'hardware_failure'; + +export interface TrainingEvent { + id: string; + type: TrainingEventType; + tick: number; + severity: 'minor' | 'moderate' | 'major'; + description: string; + resolved: boolean; + impact: { + ticksDelayed?: number; + progressLost?: number; + capabilityBonus?: number; + capabilityDomain?: keyof ModelCapabilities; + }; } export interface ModelCapabilities { reasoning: number; coding: number; creative: number; + math: number; + knowledge: number; multimodal: number; agents: number; speed: number; + contextUtilization: number; } -export interface ModelTuning { - preset: TuningPreset; - verbosity?: number; - safetyLevel?: number; - creativity?: number; - speedQuality?: number; - refusalRate?: number; +export interface SafetyProfile { + overallSafety: number; + refusalRate: number; + harmAvoidance: number; + instructionFollowing: number; + honesty: number; } -export type TuningPreset = 'helpful-safe' | 'max-capability' | 'enterprise' | 'creative'; +export interface BaseModel { + id: string; + familyId: string; + name: string; + architecture: ModelArchitecture; + dataMix: DataMixAllocation; + capabilities: ModelCapabilities; + safetyProfile: SafetyProfile; + rawCapability: number; + isDeployed: boolean; + trainedAtTick: number; + trainingCostTotal: number; + trainingStagesCompleted: TrainingStage[]; +} -export interface TrainingJob { - modelName: string; +export type VariantType = 'distilled' | 'fine-tuned' | 'quantized'; +export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2'; + +export interface ModelVariant { + id: string; + familyId: string; + baseModelId: string; + name: string; + variantType: VariantType; + architecture: ModelArchitecture; + capabilities: ModelCapabilities; + safetyProfile: SafetyProfile; + isDeployed: boolean; + createdAtTick: number; + quantization?: QuantizationLevel; + distillationRetention?: number; + finetuneSpecialization?: SFTSpecialization; + costMultiplier: number; + speedMultiplier: number; +} + +export interface ModelFamily { + id: string; + name: string; generation: number; - allocatedCompute: number; - allocatedDataTokens: number; + baseModelId: string | null; + variants: ModelVariant[]; + createdAtTick: number; +} + +export type VariantJobType = 'distillation' | 'fine-tuning' | 'quantization'; + +export interface VariantCreationJob { + id: string; + familyId: string; + baseModelId: string; + jobType: VariantJobType; + config: DistillationConfig | FineTuneConfig | QuantizationConfig; progressTicks: number; totalTicks: number; - estimatedCapability: number; + allocatedComputeFraction: number; + status: 'active' | 'completed'; +} + +export interface DistillationConfig { + targetParameters: number; + targetArchitecture: ArchitectureType; + variantName: string; +} + +export interface FineTuneConfig { + specialization: SFTSpecialization; + datasetIds: string[]; + variantName: string; +} + +export interface QuantizationConfig { + level: QuantizationLevel; + variantName: string; +} + +export type BenchmarkCategory = 'reasoning' | 'coding' | 'math' | 'knowledge' | 'safety' | 'chat' | 'multimodal' | 'agents'; + +export interface BenchmarkDefinition { + id: string; + name: string; + category: BenchmarkCategory; + description: string; + primaryCapability: keyof ModelCapabilities; + secondaryCapability?: keyof ModelCapabilities; + computeCost: number; + ticksToRun: number; + unlockedAtEra: Era; + marketRelevance: { + consumer: number; + enterprise: number; + developer: number; + research: number; + }; +} + +export interface BenchmarkResult { + benchmarkId: string; + modelId: string; + score: number; + ranAtTick: number; + rank?: number; +} + +export interface EvalJob { + id: string; + modelId: string; + benchmarkIds: string[]; + progressTicks: number; + totalTicks: number; + computeAllocated: number; + status: 'active' | 'completed'; + results: BenchmarkResult[]; +} + +export type ProductLineType = 'text-api' | 'chat-product' | 'chat-free' | 'chat-enterprise' | 'code-api' | 'image' | 'agents-api'; + +export interface ProductPricing { + inputTokenPrice: number; + outputTokenPrice: number; + thinkingTokenBudget: number; + cachingEnabled: boolean; + subscriptionPrice: number; + freeTokenAllowance: number; } export interface ProductLine { @@ -57,20 +245,38 @@ export interface ProductLine { pricing: ProductPricing; } -export type ProductLineType = 'text-api' | 'chat-product' | 'image' | 'code' | 'agents'; - -export interface ProductPricing { - inputTokenPrice: number; - outputTokenPrice: number; - thinkingTokenBudget: number; - cachingEnabled: boolean; - subscriptionPrice: number; - freeTokenAllowance: number; +export interface ModelsState { + families: ModelFamily[]; + baseModels: BaseModel[]; + activeTrainingPipelines: TrainingPipeline[]; + variantJobs: VariantCreationJob[]; + evalJobs: EvalJob[]; + benchmarkResults: BenchmarkResult[]; + productLines: ProductLine[]; + bestDeployedModelScore: number; + bestDeployedSafetyScore: number; } +export const DEFAULT_DATA_MIX: DataMixAllocation = { + web: 0.35, + books: 0.10, + code: 0.15, + scientific: 0.10, + conversation: 0.10, + multilingual: 0.05, + images: 0.05, + video: 0.03, + audio: 0.02, + synthetic: 0.05, +}; + export const INITIAL_MODELS: ModelsState = { - trainedModels: [], - activeTraining: null, + families: [], + baseModels: [], + activeTrainingPipelines: [], + variantJobs: [], + evalJobs: [], + benchmarkResults: [], productLines: [ { id: 'text-api', @@ -103,4 +309,6 @@ export const INITIAL_MODELS: ModelsState = { }, }, ], + bestDeployedModelScore: 0, + bestDeployedSafetyScore: 0, };