diff --git a/apps/web/src/components/dev/StateInspectionTab.tsx b/apps/web/src/components/dev/StateInspectionTab.tsx index 4901cf6..d9a45e7 100644 --- a/apps/web/src/components/dev/StateInspectionTab.tsx +++ b/apps/web/src/components/dev/StateInspectionTab.tsx @@ -62,9 +62,14 @@ export function StateInspectionTab() {
+ + + + + - - + +
@@ -85,6 +90,9 @@ export function StateInspectionTab() { + + +
diff --git a/apps/web/src/pages/InfrastructurePage.tsx b/apps/web/src/pages/InfrastructurePage.tsx index d390cbb..1ef255d 100644 --- a/apps/web/src/pages/InfrastructurePage.tsx +++ b/apps/web/src/pages/InfrastructurePage.tsx @@ -17,6 +17,7 @@ import { estimateNetworkSlots, maxComputeRacks, SWITCH_TIER_CONFIGS, DC_UPGRADE_COST_FRACTION, DC_UPGRADE_INCREMENT, + skuTotalFlops, } from '@ai-tycoon/shared'; import type { DCTier, RackSkuId, LocationId, PipelineStage, Era, @@ -357,7 +358,7 @@ function ClusterFillAllModal({ cluster, money, era, research, onConfirm, onClose }) { const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => { if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false; - if (s.requiredResearch && !research.includes(s.requiredResearch)) return false; + if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false; return true; }); @@ -540,7 +541,7 @@ function ClusterDetailView({ clusterId }: { clusterId: string }) {
FLOPS: { formatNumber(campus.dataCenters.reduce((s, d) => { const sku = d.rackSkuId ? RACK_SKU_CONFIGS[d.rackSkuId] : null; - return s + (sku ? d.effectiveComputeRacks * sku.flopsPerRack : 0); + return s + (sku ? d.effectiveComputeRacks * skuTotalFlops(sku) : 0); }, 0)) }
@@ -644,7 +645,7 @@ function FillAllDCsModal({ campus, money, era, research, onConfirm, onClose }: { }) { const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => { if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false; - if (s.requiredResearch && !research.includes(s.requiredResearch)) return false; + if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false; return true; }); @@ -754,7 +755,7 @@ function RetrofitCampusModal({ campus, era, research, onConfirm, onClose }: { const targetSkus = Object.values(RACK_SKU_CONFIGS).filter(s => { if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false; - if (s.requiredResearch && !research.includes(s.requiredResearch)) return false; + if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false; return true; }); @@ -810,7 +811,7 @@ function RetrofitCampusModal({ campus, era, research, onConfirm, onClose }: { }`}>
{s.name}
-
{s.flopsPerRack} FLOPS | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}/rack
+
{s.trainingFlops}T / {s.inferenceFlops}I FLOPS | {s.totalVramGB}GB | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}/rack
{isCurrentOnly && Current} {selectedSku === s.id && } @@ -1140,7 +1141,7 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: { const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => { if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false; - if (s.requiredResearch && !research.includes(s.requiredResearch)) return false; + if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false; if (dc.rackSkuId && dc.rackSkuId !== s.id) return false; return true; }); @@ -1168,9 +1169,10 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: { {/* Stats Grid */} -
+
- + +
@@ -1240,7 +1242,7 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: { setSelectedSku(s.id)} className="accent-accent" />
{s.name}
-
{s.flopsPerRack} FLOPS | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}
+
{s.trainingFlops}T / {s.inferenceFlops}I FLOPS | {s.totalVramGB}GB | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}
))} @@ -1311,14 +1313,14 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: { {Object.values(RACK_SKU_CONFIGS).filter(s => { if (s.id === dc.rackSkuId) return false; if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false; - if (s.requiredResearch && !research.includes(s.requiredResearch)) return false; + if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false; return true; }).map(s => ( @@ -1332,6 +1334,24 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: { {/* Upgrades Tab */} {activeTab === 'upgrades' && (
+ {/* Cooling & Network Fabric */} +
+
+ +
+
Cooling Type
+
{dc.coolingType}
+
+
+
+ +
+
Network Fabric
+
{dc.networkFabric}
+
+
+
+ {(['cooling', 'redundancy'] as const).map(upgrade => { const level = upgrade === 'cooling' ? dc.coolingLevel : dc.redundancyLevel; const cost = tierConfig.baseCost * DC_UPGRADE_COST_FRACTION; diff --git a/apps/web/src/pages/ModelsPage.tsx b/apps/web/src/pages/ModelsPage.tsx index 767a43b..73d50f6 100644 --- a/apps/web/src/pages/ModelsPage.tsx +++ b/apps/web/src/pages/ModelsPage.tsx @@ -2,7 +2,7 @@ import { useState } from 'react'; import { Brain, Play, Rocket, Globe, SlidersHorizontal, ChevronDown, ChevronUp } from 'lucide-react'; import { TutorialHint } from '@/components/game/TutorialHint'; import { useGameStore } from '@/store'; -import { formatNumber, formatPercent, formatDuration } from '@ai-tycoon/shared'; +import { formatNumber, formatPercent, formatDuration, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared'; import type { TuningPreset } from '@ai-tycoon/shared'; export function ModelsPage() { @@ -10,6 +10,7 @@ export function ModelsPage() { const activeTraining = useGameStore((s) => s.models.activeTraining); const productLines = useGameStore((s) => s.models.productLines); const totalFlops = useGameStore((s) => s.compute.totalFlops); + const totalVramGB = useGameStore((s) => s.compute.totalVramGB); const trainingAlloc = useGameStore((s) => s.compute.trainingAllocation); const totalData = useGameStore((s) => s.data.totalTrainingTokens); const startTraining = useGameStore((s) => s.startTraining); @@ -89,6 +90,14 @@ export function ModelsPage() {
ETA: {formatDuration(activeTraining.totalTicks - activeTraining.progressTicks)}
+ {(() => { + const reqVram = VRAM_REQUIREMENTS_BY_GENERATION[activeTraining.generation] ?? 0; + return reqVram > 0 && totalVramGB < reqVram ? ( +

+ Training stalled — requires {formatNumber(reqVram)} GB VRAM (have {formatNumber(totalVramGB)} GB). Deploy more GPU racks. +

+ ) : null; + })()}
) : (
@@ -102,11 +111,15 @@ export function ModelsPage() { className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50" />
-
+
Training Compute
{formatNumber(trainingFlops)} FLOPS
+
+
Available VRAM
+
{formatNumber(totalVramGB)} GB
+
Training Data
{formatNumber(totalData)} tokens
diff --git a/apps/web/src/store/index.ts b/apps/web/src/store/index.ts index 9059efc..1930b30 100644 --- a/apps/web/src/store/index.ts +++ b/apps/web/src/store/index.ts @@ -10,6 +10,7 @@ import type { ActiveResearch, OwnedDataset, LocationId, DeploymentCohort, PipelineStage, CampusRetrofitQueue, + CoolingType, NetworkFabric, } from '@ai-tycoon/shared'; import type { FundingRoundType, OverloadPolicy, TuningPreset, ModelTuning } from '@ai-tycoon/shared'; import { @@ -27,6 +28,7 @@ import { LOCATION_CONFIGS, estimateNetworkSlots, maxComputeRacks, uuid, + COOLING_TYPE_CONFIGS, COOLING_ORDER, NETWORK_FABRIC_CONFIGS, FABRIC_ORDER, } from '@ai-tycoon/shared'; import { emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary, @@ -93,6 +95,8 @@ interface Actions { startCampusRetrofit: (campusId: string, targetSkuId: RackSkuId, maxConcurrent: number) => void; cancelCampusRetrofit: (campusId: string) => void; upgradeDataCenter: (dataCenterId: string, upgrade: 'cooling' | 'redundancy') => void; + upgradeCoolingType: (dataCenterId: string, targetCooling: CoolingType) => void; + upgradeNetworkFabric: (dataCenterId: string, targetFabric: NetworkFabric) => void; startTraining: (job: Omit) => void; deployModel: (modelId: string) => void; setProductPricing: (productLineId: string, field: string, value: number) => void; @@ -197,6 +201,9 @@ export function computeFillForDC( if (dc.rackSkuId !== null && dc.rackSkuId !== skuId) return { qty: 0, cost: 0 }; const sku = RACK_SKU_CONFIGS[skuId]; + const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType); + if (!coolingOk) return { qty: 0, cost: 0 }; + const tierConfig = DC_TIER_CONFIGS[dc.tier]; const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier); const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0); @@ -414,6 +421,11 @@ export const useGameStore = create()( retrofitState: null, coolingLevel: 0, redundancyLevel: 0, + coolingType: 'air' as CoolingType, + networkFabric: 'ethernet-100g' as NetworkFabric, + dcTrainingFlops: 0, + dcInferenceFlops: 0, + dcTotalVramGB: 0, }; return { @@ -439,7 +451,10 @@ export const useGameStore = create()( const sku = RACK_SKU_CONFIGS[skuId]; const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi']; if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s; - if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s; + if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s; + + const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType); + if (!coolingOk) return s; const tierConfig = DC_TIER_CONFIGS[dc.tier]; const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier); @@ -532,6 +547,11 @@ export const useGameStore = create()( retrofitState: null, coolingLevel: 0, redundancyLevel: 0, + coolingType: 'air' as CoolingType, + networkFabric: 'ethernet-100g' as NetworkFabric, + dcTrainingFlops: 0, + dcInferenceFlops: 0, + dcTotalVramGB: 0, }); } @@ -556,7 +576,10 @@ export const useGameStore = create()( const sku = RACK_SKU_CONFIGS[newSkuId]; const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi']; if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s; - if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s; + if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s; + + const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType); + if (!coolingOk) return s; const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0); const totalRacksToRetrofit = dc.computeRacksOnline + pipelineCount; @@ -604,12 +627,14 @@ export const useGameStore = create()( const sku = RACK_SKU_CONFIGS[skuId]; const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi']; if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s; - if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s; + if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s; let remainingMoney = s.economy.money; const dcUpdates = new Map(); for (const dc of found.campus.dataCenters) { + const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType); + if (!coolingOk) continue; const { qty, cost } = computeFillForDC(dc, skuId, remainingMoney); if (qty <= 0) continue; @@ -649,7 +674,7 @@ export const useGameStore = create()( const sku = RACK_SKU_CONFIGS[skuId]; const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi']; if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s; - if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s; + if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s; let remainingMoney = s.economy.money; const allDcUpdates = new Map(); @@ -657,6 +682,8 @@ export const useGameStore = create()( for (const campus of cluster.campuses) { if (campus.status !== 'operational') continue; for (const dc of campus.dataCenters) { + const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType); + if (!coolingOk) continue; const { qty, cost } = computeFillForDC(dc, skuId, remainingMoney); if (qty <= 0) continue; @@ -701,7 +728,7 @@ export const useGameStore = create()( const sku = RACK_SKU_CONFIGS[targetSkuId]; const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi']; if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s; - if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s; + if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s; const eligible: string[] = []; const skipped: string[] = []; @@ -792,6 +819,58 @@ export const useGameStore = create()( }; }), + upgradeCoolingType: (dataCenterId, targetCooling) => set((s) => { + const found = findDC(s.infrastructure, dataCenterId); + if (!found) return s; + const { dc } = found; + if (dc.status !== 'operational') return s; + + const currentIdx = COOLING_ORDER.indexOf(dc.coolingType); + const targetIdx = COOLING_ORDER.indexOf(targetCooling); + if (targetIdx <= currentIdx) return s; + + // Research gates + if (targetCooling === 'liquid' && !s.research.completedResearch.includes('liquid-cooling-tech')) return s; + if (targetCooling === 'immersion' && !s.research.completedResearch.includes('immersion-cooling-tech')) return s; + + const cost = COOLING_TYPE_CONFIGS[targetCooling].upgradeCost[dc.tier]; + if (s.economy.money < cost) return s; + + return { + economy: { ...s.economy, money: s.economy.money - cost }, + infrastructure: updateDCInInfra(s.infrastructure, dataCenterId, (d) => ({ + ...d, + coolingType: targetCooling, + })), + }; + }), + + upgradeNetworkFabric: (dataCenterId, targetFabric) => set((s) => { + const found = findDC(s.infrastructure, dataCenterId); + if (!found) return s; + const { dc } = found; + if (dc.status !== 'operational') return s; + + const currentIdx = FABRIC_ORDER.indexOf(dc.networkFabric); + const targetIdx = FABRIC_ORDER.indexOf(targetFabric); + if (targetIdx <= currentIdx) return s; + + // InfiniBand requires research + if ((targetFabric === 'infiniband-ndr' || targetFabric === 'infiniband-xdr') + && !s.research.completedResearch.includes('infiniband-networking')) return s; + + const cost = NETWORK_FABRIC_CONFIGS[targetFabric].upgradeCost[dc.tier]; + if (s.economy.money < cost) return s; + + return { + economy: { ...s.economy, money: s.economy.money - cost }, + infrastructure: updateDCInInfra(s.infrastructure, dataCenterId, (d) => ({ + ...d, + networkFabric: targetFabric, + })), + }; + }), + // --- Non-infrastructure actions (unchanged) --- startTraining: (job) => set((s) => ({ @@ -979,7 +1058,7 @@ export const useGameStore = create()( notifications: [{ id: uuid(), title: 'Save Reset', - message: 'Your save was reset due to a major infrastructure redesign — Hypercluster scale! Build clusters, campuses, and data centers.', + message: 'Your save was reset due to a major rack system overhaul — 20 SKUs with training/inference specialization, VRAM, cooling tech, interconnects, and AMD/ASIC vendors!', type: 'info' as const, tick: 0, read: false, diff --git a/packages/game-engine/src/data/techTree.ts b/packages/game-engine/src/data/techTree.ts index ca32836..c2eb182 100644 --- a/packages/game-engine/src/data/techTree.ts +++ b/packages/game-engine/src/data/techTree.ts @@ -25,7 +25,7 @@ export const TECH_TREE: ResearchNode[] = [ { id: 'advanced-gpu-arch', name: 'Advanced GPU Architecture', - description: 'Unlocks procurement of NVIDIA A100 rack configurations.', + description: 'Unlocks NVIDIA A100 PCIe and SXM rack configurations.', era: 'startup', category: 'infrastructure', prerequisites: [], @@ -35,7 +35,7 @@ export const TECH_TREE: ResearchNode[] = [ { id: 'next-gen-gpu', name: 'Next-Gen GPU Architecture', - description: 'Unlocks procurement of NVIDIA H100 rack configurations.', + description: 'Unlocks NVIDIA H100 and H200 rack configurations.', era: 'scaleup', category: 'infrastructure', prerequisites: ['advanced-gpu-arch'], @@ -45,7 +45,7 @@ export const TECH_TREE: ResearchNode[] = [ { id: 'frontier-compute', name: 'Frontier Compute', - description: 'Unlocks procurement of NVIDIA B200 rack configurations.', + description: 'Unlocks NVIDIA B100 and B200 rack configurations.', era: 'bigtech', category: 'infrastructure', prerequisites: ['next-gen-gpu'], @@ -55,13 +55,73 @@ export const TECH_TREE: ResearchNode[] = [ { id: 'custom-silicon', name: 'Custom Silicon Design', - description: 'Design and fabricate custom AI ASICs for maximum efficiency.', + description: 'Unlocks custom Training and Inference ASIC configurations.', era: 'agi', category: 'infrastructure', prerequisites: ['frontier-compute'], cost: { researchPoints: 10, compute: 500, ticks: 900 }, effects: [{ type: 'unlock_rack', target: 'custom', value: 1 }], }, + { + id: 'amd-ecosystem', + name: 'AMD ROCm Ecosystem', + description: 'Adapt software stack for AMD GPUs. Unlocks MI250X, MI300X, MI325X racks.', + era: 'scaleup', + category: 'infrastructure', + prerequisites: ['advanced-gpu-arch'], + cost: { researchPoints: 2, compute: 30, ticks: 200 }, + effects: [{ type: 'unlock_rack', target: 'amd', value: 1 }], + }, + { + id: 'inference-specialization', + name: 'Inference Specialization', + description: 'Optimized inference kernels unlock L4, L40S, and custom inference racks.', + era: 'scaleup', + category: 'infrastructure', + prerequisites: ['quantization'], + cost: { researchPoints: 2, compute: 20, ticks: 150 }, + effects: [{ type: 'unlock_rack', target: 'inference', value: 1 }], + }, + { + id: 'rack-scale-compute', + name: 'Rack-Scale Computing', + description: 'Full NVLink domain architecture. Unlocks GB200 NVL72 — 72 GPUs in a single rack.', + era: 'agi', + category: 'infrastructure', + prerequisites: ['frontier-compute'], + cost: { researchPoints: 8, compute: 400, ticks: 720 }, + effects: [{ type: 'unlock_rack', target: 'gb200-nvl72', value: 1 }], + }, + { + id: 'liquid-cooling-tech', + name: 'Liquid Cooling Systems', + description: 'Enables liquid cooling upgrades for data centers. Required for SXM and high-power racks.', + era: 'scaleup', + category: 'infrastructure', + prerequisites: ['advanced-cooling'], + cost: { researchPoints: 2, compute: 25, ticks: 180 }, + effects: [{ type: 'unlock_feature', target: 'liquid-cooling', value: 1 }], + }, + { + id: 'immersion-cooling-tech', + name: 'Immersion Cooling', + description: 'Enables immersion cooling for maximum power density. Required for custom ASICs.', + era: 'bigtech', + category: 'infrastructure', + prerequisites: ['liquid-cooling-tech'], + cost: { researchPoints: 5, compute: 100, ticks: 400 }, + effects: [{ type: 'unlock_feature', target: 'immersion-cooling', value: 1 }], + }, + { + id: 'infiniband-networking', + name: 'InfiniBand Networking', + description: 'High-bandwidth interconnect for distributed training. Unlocks InfiniBand fabric upgrades.', + era: 'scaleup', + category: 'infrastructure', + prerequisites: ['network-engineering-i'], + cost: { researchPoints: 3, compute: 40, ticks: 240 }, + effects: [{ type: 'unlock_feature', target: 'infiniband', value: 1 }], + }, { id: 'dc-engineering-ii', name: 'DC Engineering II', diff --git a/packages/game-engine/src/systems/computeSystem.ts b/packages/game-engine/src/systems/computeSystem.ts index b4b63fe..d56fab8 100644 --- a/packages/game-engine/src/systems/computeSystem.ts +++ b/packages/game-engine/src/systems/computeSystem.ts @@ -3,19 +3,44 @@ import { FLOPS_TO_TOKENS_MULTIPLIER } from '@ai-tycoon/shared'; export interface CapacityResult { totalFlops: number; + totalTrainingFlops: number; + totalInferenceFlops: number; + totalVramGB: number; trainingAllocation: number; inferenceAllocation: number; + effectiveTrainingFlops: number; + effectiveInferenceFlops: number; tokensPerSecondCapacity: number; } export function computeCapacity(state: GameState, infrastructure: InfrastructureState): CapacityResult { - const totalFlops = infrastructure.totalFlops; + const { totalTrainingFlops, totalInferenceFlops, totalVramGB } = infrastructure; const trainingAllocation = state.compute.trainingAllocation; const inferenceAllocation = 1 - trainingAllocation; - const inferenceFlops = totalFlops * inferenceAllocation; - const tokensPerSecondCapacity = inferenceFlops * FLOPS_TO_TOKENS_MULTIPLIER; - return { totalFlops, trainingAllocation, inferenceAllocation, tokensPerSecondCapacity }; + // Training hardware can do inference at ~50% efficiency + // Inference hardware can do training at ~30% efficiency (no NVLink, poor scaling) + const effectiveTrainingFlops = + totalTrainingFlops * trainingAllocation + + totalInferenceFlops * trainingAllocation * 0.3; + + const effectiveInferenceFlops = + totalInferenceFlops * inferenceAllocation + + totalTrainingFlops * inferenceAllocation * 0.5; + + const tokensPerSecondCapacity = effectiveInferenceFlops * FLOPS_TO_TOKENS_MULTIPLIER; + + return { + totalFlops: totalTrainingFlops + totalInferenceFlops, + totalTrainingFlops, + totalInferenceFlops, + totalVramGB, + trainingAllocation, + inferenceAllocation, + effectiveTrainingFlops, + effectiveInferenceFlops, + tokensPerSecondCapacity, + }; } export function finalizeCompute(capacity: CapacityResult, totalTokenDemand: number): ComputeState { diff --git a/packages/game-engine/src/systems/infrastructureSystem.ts b/packages/game-engine/src/systems/infrastructureSystem.ts index 2233f62..f1fa0ed 100644 --- a/packages/game-engine/src/systems/infrastructureSystem.ts +++ b/packages/game-engine/src/systems/infrastructureSystem.ts @@ -2,7 +2,7 @@ import type { GameState, InfrastructureState, Cluster, Campus, DataCenter, DeploymentCohort, PipelineStage, RackSkuId, NetworkSwitch, SwitchTier, DCNetworkSummary, CampusNetworkSummary, ClusterNetworkSummary, - CampusRetrofitQueue, DCTier, + CampusRetrofitQueue, DCTier, IntraNodeInterconnect, NetworkFabric, RackSkuConfig, } from '@ai-tycoon/shared'; import { LOCATION_CONFIGS, @@ -19,6 +19,8 @@ import { T3_COUNT_PER_DC_TIER, SWITCH_REPAIR_COST_FRACTION, NETWORK_DEGRADATION, + COOLING_TYPE_CONFIGS, + NETWORK_FABRIC_CONFIGS, estimateNetworkSlots, } from '@ai-tycoon/shared'; import type { TickNotification } from '../tick'; @@ -435,6 +437,30 @@ function processNetworkTick( return { switchRepairCosts, notifications, dirty }; } +// --- Interconnect Training Multiplier --- + +const INTRA_NODE_BONUS: Record = { + 'pcie-gen4': 0.0, + 'pcie-gen5': 0.05, + 'nvlink-3': 0.15, + 'nvlink-4': 0.25, + 'nvlink-5': 0.35, + 'nvlink-domain': 0.50, + 'infinity-fabric': 0.10, + 'custom-mesh': 0.40, +}; + +function computeInterconnectMultiplier( + sku: RackSkuConfig, + rackCount: number, + fabric: NetworkFabric, +): number { + if (rackCount <= 1) return 1.0; + const intra = INTRA_NODE_BONUS[sku.intraNodeInterconnect] ?? 0; + const fabricBonus = NETWORK_FABRIC_CONFIGS[fabric].trainingScalingBonus; + return Math.min(1.0, 0.6 + intra + fabricBonus); +} + // --- Main Infrastructure Tick --- export function processInfrastructure(state: GameState): InfraTickResult { @@ -463,6 +489,9 @@ export function processInfrastructure(state: GameState): InfraTickResult { notifications.push(...netResult.notifications); let totalFlops = 0; + let totalTrainingFlops = 0; + let totalInferenceFlops = 0; + let totalVramGB = 0; let totalUptime = 0; let totalRackCount = 0; let totalComputeRackCount = 0; @@ -684,14 +713,23 @@ export function processInfrastructure(state: GameState): InfraTickResult { let usedPowerKW = 0; let dcFlops = 0; + let dcTrainingFlops = 0; + let dcInferenceFlops = 0; + let dcTotalVramGB = 0; if (dc.rackSkuId && computeRacksOnline > 0) { const sku = RACK_SKU_CONFIGS[dc.rackSkuId]; usedPowerKW = computeRacksOnline * sku.powerDrawKW; - dcFlops = effectiveComputeRacks * sku.flopsPerRack * networkSummary.effectiveFlopsFraction; + const bwFraction = networkSummary.effectiveFlopsFraction; + const interconnectMult = computeInterconnectMultiplier(sku, effectiveComputeRacks, dc.networkFabric); + dcTrainingFlops = effectiveComputeRacks * sku.trainingFlops * bwFraction * interconnectMult; + dcInferenceFlops = effectiveComputeRacks * sku.inferenceFlops * bwFraction; + dcTotalVramGB = computeRacksOnline * sku.totalVramGB; + dcFlops = dcTrainingFlops + dcInferenceFlops; } + const pue = COOLING_TYPE_CONFIGS[dc.coolingType].pueMultiplier; const energyCostPerTick = (tierConfig.baseEnergyCostPerTick + usedPowerKW * BASE_ENERGY_COST_PER_FLOP) - * location.energyCostMultiplier; + * location.energyCostMultiplier * pue; const maintenanceCostPerTick = totalRacksInDc * BASE_MAINTENANCE_PER_RACK; const currentUptime = totalRacksInDc > 0 ? effectiveComputeRacks / totalRacksInDc : 1; @@ -703,6 +741,9 @@ export function processInfrastructure(state: GameState): InfraTickResult { } totalFlops += dcFlops; + totalTrainingFlops += dcTrainingFlops; + totalInferenceFlops += dcInferenceFlops; + totalVramGB += dcTotalVramGB; totalRackCount += totalRacksInDc + netSlots; totalComputeRackCount += totalRacksInDc; totalDataCenterCount++; @@ -714,6 +755,7 @@ export function processInfrastructure(state: GameState): InfraTickResult { deploymentCohorts: updatedCohorts, networkSummary, effectiveComputeRacks, usedSlots, usedPowerKW, energyCostPerTick, maintenanceCostPerTick, currentUptime, + dcTrainingFlops, dcInferenceFlops, dcTotalVramGB, }; }); @@ -788,6 +830,9 @@ export function processInfrastructure(state: GameState): InfraTickResult { clusters, switchRegistry: registry, totalFlops, + totalTrainingFlops, + totalInferenceFlops, + totalVramGB, totalUptime: dcWithRacks > 0 ? totalUptime / dcWithRacks : 1, totalRackCount, totalComputeRackCount, diff --git a/packages/game-engine/src/systems/modelSystem.ts b/packages/game-engine/src/systems/modelSystem.ts index ec534f0..4352487 100644 --- a/packages/game-engine/src/systems/modelSystem.ts +++ b/packages/game-engine/src/systems/modelSystem.ts @@ -1,5 +1,5 @@ import type { GameState, ModelsState, TrainedModel, ModelCapabilities } from '@ai-tycoon/shared'; -import { uuid } from '@ai-tycoon/shared'; +import { uuid, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared'; export interface ModelTickResult { modelsState: ModelsState; @@ -12,6 +12,11 @@ export function processModels(state: GameState): ModelTickResult { return { modelsState: state.models, modelCompleted: null }; } + const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[active.generation] ?? 0; + if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) { + return { modelsState: state.models, modelCompleted: null }; + } + const researcherBoost = state.talent.departments.research.headcount * state.talent.departments.research.effectiveness; const engineerBoost = state.talent.departments.engineering.headcount * diff --git a/packages/shared/src/constants/gameBalance.ts b/packages/shared/src/constants/gameBalance.ts index 14a7496..6349ac2 100644 --- a/packages/shared/src/constants/gameBalance.ts +++ b/packages/shared/src/constants/gameBalance.ts @@ -1,4 +1,4 @@ -import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig } from '../types/infrastructure'; +import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig, CoolingType, CoolingTypeConfig, NetworkFabric, NetworkFabricConfig } from '../types/infrastructure'; export const TICK_INTERVAL_MS = 1000; export const MAX_OFFLINE_TICKS = 86_400; @@ -209,18 +209,87 @@ export function maxComputeRacks(totalSlots: number, dcTier: DCTier): number { return lo; } +// --- Cooling Type Configs --- + +export const COOLING_TYPE_CONFIGS: Record = { + air: { + name: 'Air Cooling', + upgradeCost: { small: 0, medium: 0, large: 0, mega: 0 }, + upgradeTimeTicks: 0, + pueMultiplier: 1.0, + }, + liquid: { + name: 'Liquid Cooling', + upgradeCost: { small: 200_000, medium: 600_000, large: 2_000_000, mega: 6_000_000 }, + upgradeTimeTicks: 300, + pueMultiplier: 0.85, + }, + immersion: { + name: 'Immersion Cooling', + upgradeCost: { small: 500_000, medium: 1_500_000, large: 5_000_000, mega: 15_000_000 }, + upgradeTimeTicks: 600, + pueMultiplier: 0.70, + }, +}; + +export const COOLING_ORDER: CoolingType[] = ['air', 'liquid', 'immersion']; + +// --- Network Fabric Configs --- + +export const NETWORK_FABRIC_CONFIGS: Record = { + 'ethernet-100g': { + name: '100G Ethernet', + upgradeCost: { small: 0, medium: 0, large: 0, mega: 0 }, + upgradeTimeTicks: 0, + trainingScalingBonus: 0, + }, + 'ethernet-400g': { + name: '400G Ethernet', + upgradeCost: { small: 100_000, medium: 300_000, large: 1_000_000, mega: 3_000_000 }, + upgradeTimeTicks: 200, + trainingScalingBonus: 0.10, + }, + 'infiniband-ndr': { + name: 'InfiniBand NDR', + upgradeCost: { small: 300_000, medium: 900_000, large: 3_000_000, mega: 9_000_000 }, + upgradeTimeTicks: 400, + trainingScalingBonus: 0.25, + }, + 'infiniband-xdr': { + name: 'InfiniBand XDR', + upgradeCost: { small: 800_000, medium: 2_400_000, large: 8_000_000, mega: 24_000_000 }, + upgradeTimeTicks: 600, + trainingScalingBonus: 0.40, + }, +}; + +export const FABRIC_ORDER: NetworkFabric[] = ['ethernet-100g', 'ethernet-400g', 'infiniband-ndr', 'infiniband-xdr']; + // --- Rack SKU Configs --- +export function skuTotalFlops(sku: RackSkuConfig): number { + return sku.trainingFlops + sku.inferenceFlops; +} + export const RACK_SKU_CONFIGS: Record = { + // === STARTUP ERA === 'consumer-x4': { id: 'consumer-x4', name: 'Consumer GPU x4', era: 'startup', + gpuVendor: 'nvidia', + gpuModel: 'RTX Consumer', gpuCount: 4, - flopsPerRack: 4, + trainingFlops: 2, + inferenceFlops: 4, + vramPerGpuGB: 12, + totalVramGB: 48, + requiredCooling: 'air', + intraNodeInterconnect: 'pcie-gen4', + intraNodeBandwidthGBps: 64, powerDrawKW: 0.4, baseCost: 3_200, - requiredResearch: null, + requiredResearch: [], pipelineTimeTicks: { manufacturing: 20, receiving: 10, installation: 15, testing: 15 }, testFailureRate: 0.05, productionFailureRate: 0.0002, @@ -230,11 +299,19 @@ export const RACK_SKU_CONFIGS: Record = { id: 't4-x4', name: 'NVIDIA T4 x4', era: 'startup', + gpuVendor: 'nvidia', + gpuModel: 'T4', gpuCount: 4, - flopsPerRack: 32, - powerDrawKW: 1.2, - baseCost: 20_000, - requiredResearch: null, + trainingFlops: 8, + inferenceFlops: 32, + vramPerGpuGB: 16, + totalVramGB: 64, + requiredCooling: 'air', + intraNodeInterconnect: 'pcie-gen4', + intraNodeBandwidthGBps: 64, + powerDrawKW: 0.5, + baseCost: 12_000, + requiredResearch: [], pipelineTimeTicks: { manufacturing: 30, receiving: 15, installation: 25, testing: 20 }, testFailureRate: 0.07, productionFailureRate: 0.0003, @@ -243,115 +320,370 @@ export const RACK_SKU_CONFIGS: Record = { 't4-x8': { id: 't4-x8', name: 'NVIDIA T4 x8', - era: 'scaleup', + era: 'startup', + gpuVendor: 'nvidia', + gpuModel: 'T4', gpuCount: 8, - flopsPerRack: 64, - powerDrawKW: 2.4, - baseCost: 38_000, - requiredResearch: null, + trainingFlops: 16, + inferenceFlops: 64, + vramPerGpuGB: 16, + totalVramGB: 128, + requiredCooling: 'air', + intraNodeInterconnect: 'pcie-gen4', + intraNodeBandwidthGBps: 64, + powerDrawKW: 1.0, + baseCost: 22_000, + requiredResearch: [], pipelineTimeTicks: { manufacturing: 40, receiving: 20, installation: 30, testing: 30 }, testFailureRate: 0.08, productionFailureRate: 0.0003, repairCostFraction: 0.12, }, - 'a100-x4': { - id: 'a100-x4', - name: 'NVIDIA A100 x4', + + // === SCALEUP ERA === + 'l4-x8': { + id: 'l4-x8', + name: 'NVIDIA L4 x8', era: 'scaleup', + gpuVendor: 'nvidia', + gpuModel: 'L4', + gpuCount: 8, + trainingFlops: 30, + inferenceFlops: 180, + vramPerGpuGB: 24, + totalVramGB: 192, + requiredCooling: 'air', + intraNodeInterconnect: 'pcie-gen5', + intraNodeBandwidthGBps: 128, + powerDrawKW: 0.8, + baseCost: 28_000, + requiredResearch: ['inference-specialization'], + pipelineTimeTicks: { manufacturing: 35, receiving: 15, installation: 25, testing: 25 }, + testFailureRate: 0.07, + productionFailureRate: 0.0002, + repairCostFraction: 0.10, + }, + 'a100-pcie-x4': { + id: 'a100-pcie-x4', + name: 'A100 PCIe x4', + era: 'scaleup', + gpuVendor: 'nvidia', + gpuModel: 'A100 PCIe 80GB', gpuCount: 4, - flopsPerRack: 160, - powerDrawKW: 4.0, - baseCost: 60_000, - requiredResearch: 'advanced-gpu-arch', + trainingFlops: 100, + inferenceFlops: 140, + vramPerGpuGB: 80, + totalVramGB: 320, + requiredCooling: 'air', + intraNodeInterconnect: 'pcie-gen4', + intraNodeBandwidthGBps: 64, + powerDrawKW: 2.0, + baseCost: 55_000, + requiredResearch: ['advanced-gpu-arch'], pipelineTimeTicks: { manufacturing: 60, receiving: 25, installation: 50, testing: 45 }, testFailureRate: 0.10, productionFailureRate: 0.0004, repairCostFraction: 0.15, }, - 'a100-x8': { - id: 'a100-x8', - name: 'NVIDIA A100 x8', + 'a100-sxm-x8': { + id: 'a100-sxm-x8', + name: 'A100 SXM x8', era: 'scaleup', + gpuVendor: 'nvidia', + gpuModel: 'A100 SXM 80GB', gpuCount: 8, - flopsPerRack: 320, - powerDrawKW: 8.0, + trainingFlops: 320, + inferenceFlops: 200, + vramPerGpuGB: 80, + totalVramGB: 640, + requiredCooling: 'liquid', + intraNodeInterconnect: 'nvlink-3', + intraNodeBandwidthGBps: 600, + powerDrawKW: 5.0, baseCost: 115_000, - requiredResearch: 'advanced-gpu-arch', + requiredResearch: ['advanced-gpu-arch'], pipelineTimeTicks: { manufacturing: 70, receiving: 30, installation: 55, testing: 55 }, testFailureRate: 0.12, productionFailureRate: 0.0004, repairCostFraction: 0.15, }, - 'h100-x4': { - id: 'h100-x4', - name: 'NVIDIA H100 x4', - era: 'bigtech', - gpuCount: 4, - flopsPerRack: 480, + 'mi250x-x8': { + id: 'mi250x-x8', + name: 'AMD MI250X x8', + era: 'scaleup', + gpuVendor: 'amd', + gpuModel: 'MI250X', + gpuCount: 8, + trainingFlops: 240, + inferenceFlops: 160, + vramPerGpuGB: 128, + totalVramGB: 1024, + requiredCooling: 'air', + intraNodeInterconnect: 'infinity-fabric', + intraNodeBandwidthGBps: 400, powerDrawKW: 5.6, - baseCost: 140_000, - requiredResearch: 'next-gen-gpu', - pipelineTimeTicks: { manufacturing: 80, receiving: 30, installation: 65, testing: 65 }, + baseCost: 80_000, + requiredResearch: ['amd-ecosystem'], + pipelineTimeTicks: { manufacturing: 75, receiving: 30, installation: 60, testing: 60 }, testFailureRate: 0.15, productionFailureRate: 0.0005, repairCostFraction: 0.18, }, - 'h100-x8': { - id: 'h100-x8', - name: 'NVIDIA H100 x8', + + // === BIG TECH ERA === + 'h100-pcie-x4': { + id: 'h100-pcie-x4', + name: 'H100 PCIe x4', era: 'bigtech', + gpuVendor: 'nvidia', + gpuModel: 'H100 PCIe 80GB', + gpuCount: 4, + trainingFlops: 180, + inferenceFlops: 480, + vramPerGpuGB: 80, + totalVramGB: 320, + requiredCooling: 'air', + intraNodeInterconnect: 'pcie-gen5', + intraNodeBandwidthGBps: 128, + powerDrawKW: 1.8, + baseCost: 130_000, + requiredResearch: ['next-gen-gpu'], + pipelineTimeTicks: { manufacturing: 80, receiving: 30, installation: 65, testing: 65 }, + testFailureRate: 0.12, + productionFailureRate: 0.0004, + repairCostFraction: 0.15, + }, + 'h100-sxm-x8': { + id: 'h100-sxm-x8', + name: 'H100 SXM x8', + era: 'bigtech', + gpuVendor: 'nvidia', + gpuModel: 'H100 SXM 80GB', gpuCount: 8, - flopsPerRack: 960, - powerDrawKW: 11.2, - baseCost: 270_000, - requiredResearch: 'next-gen-gpu', + trainingFlops: 960, + inferenceFlops: 600, + vramPerGpuGB: 80, + totalVramGB: 640, + requiredCooling: 'liquid', + intraNodeInterconnect: 'nvlink-4', + intraNodeBandwidthGBps: 900, + powerDrawKW: 7.0, + baseCost: 280_000, + requiredResearch: ['next-gen-gpu'], pipelineTimeTicks: { manufacturing: 90, receiving: 35, installation: 75, testing: 80 }, testFailureRate: 0.18, productionFailureRate: 0.0005, repairCostFraction: 0.18, }, - 'b200-x4': { - id: 'b200-x4', - name: 'NVIDIA B200 x4', + 'h200-sxm-x8': { + id: 'h200-sxm-x8', + name: 'H200 SXM x8', era: 'bigtech', - gpuCount: 4, - flopsPerRack: 1600, - powerDrawKW: 8.0, + gpuVendor: 'nvidia', + gpuModel: 'H200 SXM 141GB', + gpuCount: 8, + trainingFlops: 1000, + inferenceFlops: 650, + vramPerGpuGB: 141, + totalVramGB: 1128, + requiredCooling: 'liquid', + intraNodeInterconnect: 'nvlink-4', + intraNodeBandwidthGBps: 900, + powerDrawKW: 7.0, + baseCost: 340_000, + requiredResearch: ['next-gen-gpu'], + pipelineTimeTicks: { manufacturing: 95, receiving: 35, installation: 80, testing: 85 }, + testFailureRate: 0.18, + productionFailureRate: 0.0005, + repairCostFraction: 0.18, + }, + 'mi300x-x8': { + id: 'mi300x-x8', + name: 'AMD MI300X x8', + era: 'bigtech', + gpuVendor: 'amd', + gpuModel: 'MI300X 192GB', + gpuCount: 8, + trainingFlops: 700, + inferenceFlops: 450, + vramPerGpuGB: 192, + totalVramGB: 1536, + requiredCooling: 'liquid', + intraNodeInterconnect: 'infinity-fabric', + intraNodeBandwidthGBps: 500, + powerDrawKW: 7.5, baseCost: 200_000, - requiredResearch: 'frontier-compute', + requiredResearch: ['amd-ecosystem'], + pipelineTimeTicks: { manufacturing: 100, receiving: 35, installation: 75, testing: 80 }, + testFailureRate: 0.20, + productionFailureRate: 0.0006, + repairCostFraction: 0.20, + }, + 'l40s-x8': { + id: 'l40s-x8', + name: 'NVIDIA L40S x8', + era: 'bigtech', + gpuVendor: 'nvidia', + gpuModel: 'L40S 48GB', + gpuCount: 8, + trainingFlops: 120, + inferenceFlops: 900, + vramPerGpuGB: 48, + totalVramGB: 384, + requiredCooling: 'air', + intraNodeInterconnect: 'pcie-gen5', + intraNodeBandwidthGBps: 128, + powerDrawKW: 3.5, + baseCost: 160_000, + requiredResearch: ['inference-specialization'], + pipelineTimeTicks: { manufacturing: 70, receiving: 25, installation: 55, testing: 50 }, + testFailureRate: 0.10, + productionFailureRate: 0.0003, + repairCostFraction: 0.12, + }, + 'b100-x8': { + id: 'b100-x8', + name: 'NVIDIA B100 x8', + era: 'bigtech', + gpuVendor: 'nvidia', + gpuModel: 'B100 192GB', + gpuCount: 8, + trainingFlops: 1800, + inferenceFlops: 1100, + vramPerGpuGB: 192, + totalVramGB: 1536, + requiredCooling: 'air', + intraNodeInterconnect: 'nvlink-5', + intraNodeBandwidthGBps: 1800, + powerDrawKW: 7.0, + baseCost: 320_000, + requiredResearch: ['frontier-compute'], pipelineTimeTicks: { manufacturing: 100, receiving: 40, installation: 80, testing: 80 }, testFailureRate: 0.20, productionFailureRate: 0.0006, repairCostFraction: 0.20, }, - 'b200-x8': { - id: 'b200-x8', - name: 'NVIDIA B200 x8', + + // === AGI ERA === + 'b200-sxm-x8': { + id: 'b200-sxm-x8', + name: 'B200 SXM x8', era: 'agi', + gpuVendor: 'nvidia', + gpuModel: 'B200 SXM 192GB', gpuCount: 8, - flopsPerRack: 3200, - powerDrawKW: 16.0, - baseCost: 380_000, - requiredResearch: 'frontier-compute', + trainingFlops: 3200, + inferenceFlops: 1800, + vramPerGpuGB: 192, + totalVramGB: 1536, + requiredCooling: 'liquid', + intraNodeInterconnect: 'nvlink-5', + intraNodeBandwidthGBps: 1800, + powerDrawKW: 10.0, + baseCost: 400_000, + requiredResearch: ['frontier-compute'], pipelineTimeTicks: { manufacturing: 120, receiving: 45, installation: 95, testing: 100 }, testFailureRate: 0.22, productionFailureRate: 0.0006, repairCostFraction: 0.20, }, - 'custom-x8': { - id: 'custom-x8', - name: 'Custom ASIC x8', + 'gb200-nvl72': { + id: 'gb200-nvl72', + name: 'GB200 NVL72', era: 'agi', + gpuVendor: 'nvidia', + gpuModel: 'B200 NVL72', + gpuCount: 72, + trainingFlops: 36_000, + inferenceFlops: 18_000, + vramPerGpuGB: 192, + totalVramGB: 13_824, + requiredCooling: 'liquid', + intraNodeInterconnect: 'nvlink-domain', + intraNodeBandwidthGBps: 14_400, + powerDrawKW: 120.0, + baseCost: 2_500_000, + requiredResearch: ['frontier-compute', 'rack-scale-compute'], + pipelineTimeTicks: { manufacturing: 180, receiving: 60, installation: 120, testing: 120 }, + testFailureRate: 0.28, + productionFailureRate: 0.0008, + repairCostFraction: 0.15, + }, + 'mi325x-x8': { + id: 'mi325x-x8', + name: 'AMD MI325X x8', + era: 'agi', + gpuVendor: 'amd', + gpuModel: 'MI325X 256GB', gpuCount: 8, - flopsPerRack: 6400, + trainingFlops: 2400, + inferenceFlops: 1400, + vramPerGpuGB: 256, + totalVramGB: 2048, + requiredCooling: 'liquid', + intraNodeInterconnect: 'infinity-fabric', + intraNodeBandwidthGBps: 600, + powerDrawKW: 7.5, + baseCost: 280_000, + requiredResearch: ['amd-ecosystem'], + pipelineTimeTicks: { manufacturing: 130, receiving: 40, installation: 90, testing: 95 }, + testFailureRate: 0.22, + productionFailureRate: 0.0006, + repairCostFraction: 0.20, + }, + 'custom-training-x8': { + id: 'custom-training-x8', + name: 'Training ASIC x8', + era: 'agi', + gpuVendor: 'custom', + gpuModel: 'Custom Training ASIC', + gpuCount: 8, + trainingFlops: 8000, + inferenceFlops: 2000, + vramPerGpuGB: 256, + totalVramGB: 2048, + requiredCooling: 'immersion', + intraNodeInterconnect: 'custom-mesh', + intraNodeBandwidthGBps: 3200, powerDrawKW: 20.0, - baseCost: 640_000, - requiredResearch: 'custom-silicon', + baseCost: 700_000, + requiredResearch: ['custom-silicon'], pipelineTimeTicks: { manufacturing: 140, receiving: 50, installation: 100, testing: 110 }, testFailureRate: 0.25, productionFailureRate: 0.0008, repairCostFraction: 0.20, }, + 'custom-inference-x16': { + id: 'custom-inference-x16', + name: 'Inference ASIC x16', + era: 'agi', + gpuVendor: 'custom', + gpuModel: 'Custom Inference ASIC', + gpuCount: 16, + trainingFlops: 800, + inferenceFlops: 12_000, + vramPerGpuGB: 32, + totalVramGB: 512, + requiredCooling: 'air', + intraNodeInterconnect: 'custom-mesh', + intraNodeBandwidthGBps: 1600, + powerDrawKW: 5.0, + baseCost: 500_000, + requiredResearch: ['custom-silicon', 'inference-specialization'], + pipelineTimeTicks: { manufacturing: 130, receiving: 45, installation: 90, testing: 100 }, + testFailureRate: 0.22, + productionFailureRate: 0.0007, + repairCostFraction: 0.18, + }, +}; + +export const VRAM_REQUIREMENTS_BY_GENERATION: Record = { + 1: 48, + 2: 192, + 3: 640, + 4: 1536, + 5: 4096, + 6: 16384, }; // --- Pipeline & Infrastructure Constants --- diff --git a/packages/shared/src/types/compute.ts b/packages/shared/src/types/compute.ts index 1f3c45d..52df024 100644 --- a/packages/shared/src/types/compute.ts +++ b/packages/shared/src/types/compute.ts @@ -1,7 +1,12 @@ export interface ComputeState { totalFlops: number; + totalTrainingFlops: number; + totalInferenceFlops: number; + totalVramGB: number; trainingAllocation: number; inferenceAllocation: number; + effectiveTrainingFlops: number; + effectiveInferenceFlops: number; inferenceUtilization: number; tokensPerSecondCapacity: number; tokensPerSecondDemand: number; @@ -9,8 +14,13 @@ export interface ComputeState { export const INITIAL_COMPUTE: ComputeState = { totalFlops: 0, + totalTrainingFlops: 0, + totalInferenceFlops: 0, + totalVramGB: 0, trainingAllocation: 0.5, inferenceAllocation: 0.5, + effectiveTrainingFlops: 0, + effectiveInferenceFlops: 0, inferenceUtilization: 0, tokensPerSecondCapacity: 0, tokensPerSecondDemand: 0, diff --git a/packages/shared/src/types/gameState.ts b/packages/shared/src/types/gameState.ts index 3c32416..30061d4 100644 --- a/packages/shared/src/types/gameState.ts +++ b/packages/shared/src/types/gameState.ts @@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = { sfxVolume: 0.7, }; -export const SAVE_VERSION = 4; +export const SAVE_VERSION = 5; diff --git a/packages/shared/src/types/infrastructure.ts b/packages/shared/src/types/infrastructure.ts index 8b5bfec..1a0be53 100644 --- a/packages/shared/src/types/infrastructure.ts +++ b/packages/shared/src/types/infrastructure.ts @@ -75,12 +75,17 @@ export interface DataCenter { retrofitState: RetrofitState | null; coolingLevel: number; redundancyLevel: number; + coolingType: CoolingType; + networkFabric: NetworkFabric; effectiveComputeRacks: number; usedSlots: number; usedPowerKW: number; energyCostPerTick: number; maintenanceCostPerTick: number; currentUptime: number; + dcTrainingFlops: number; + dcInferenceFlops: number; + dcTotalVramGB: number; } // --- Network Topology (6-Tier Clos) --- @@ -141,13 +146,46 @@ export interface ClusterNetworkSummary { crossCampusBandwidth: number; } +// --- Cooling, Interconnect & Vendor Types --- + +export type CoolingType = 'air' | 'liquid' | 'immersion'; +export type GpuVendor = 'nvidia' | 'amd' | 'custom'; +export type IntraNodeInterconnect = + | 'pcie-gen4' | 'pcie-gen5' + | 'nvlink-3' | 'nvlink-4' | 'nvlink-5' | 'nvlink-domain' + | 'infinity-fabric' | 'custom-mesh'; +export type NetworkFabric = + | 'ethernet-100g' | 'ethernet-400g' + | 'infiniband-ndr' | 'infiniband-xdr'; + +export interface CoolingTypeConfig { + name: string; + upgradeCost: Record; + upgradeTimeTicks: number; + pueMultiplier: number; +} + +export interface NetworkFabricConfig { + name: string; + upgradeCost: Record; + upgradeTimeTicks: number; + trainingScalingBonus: number; +} + // --- Racks --- export type RackSkuId = + // Startup | 'consumer-x4' | 't4-x4' | 't4-x8' - | 'a100-x4' | 'a100-x8' - | 'h100-x4' | 'h100-x8' - | 'b200-x4' | 'b200-x8' | 'custom-x8'; + // Scaleup + | 'a100-pcie-x4' | 'a100-sxm-x8' | 'mi250x-x8' | 'l4-x8' + // Big Tech + | 'h100-pcie-x4' | 'h100-sxm-x8' | 'h200-sxm-x8' + | 'mi300x-x8' | 'l40s-x8' | 'b100-x8' + // AGI + | 'b200-sxm-x8' | 'gb200-nvl72' + | 'mi325x-x8' + | 'custom-training-x8' | 'custom-inference-x16'; export type PipelineStage = | 'ordered' | 'manufacturing' | 'receiving' @@ -164,11 +202,19 @@ export interface RackSkuConfig { id: RackSkuId; name: string; era: Era; + gpuVendor: GpuVendor; + gpuModel: string; gpuCount: number; - flopsPerRack: number; + trainingFlops: number; + inferenceFlops: number; + vramPerGpuGB: number; + totalVramGB: number; + requiredCooling: CoolingType; + intraNodeInterconnect: IntraNodeInterconnect; + intraNodeBandwidthGBps: number; powerDrawKW: number; baseCost: number; - requiredResearch: string | null; + requiredResearch: string[]; pipelineTimeTicks: PipelineTimings; testFailureRate: number; productionFailureRate: number; @@ -218,6 +264,9 @@ export interface InfrastructureState { clusters: Cluster[]; switchRegistry: Record; totalFlops: number; + totalTrainingFlops: number; + totalInferenceFlops: number; + totalVramGB: number; totalUptime: number; totalRackCount: number; totalComputeRackCount: number; @@ -229,6 +278,9 @@ export const INITIAL_INFRASTRUCTURE: InfrastructureState = { clusters: [], switchRegistry: {}, totalFlops: 0, + totalTrainingFlops: 0, + totalInferenceFlops: 0, + totalVramGB: 0, totalUptime: 1, totalRackCount: 0, totalComputeRackCount: 0,