Overhaul rack system with split FLOPS, VRAM, cooling, interconnect, and multi-vendor SKUs
CI / build-and-push (push) Successful in 29s
CI / build-and-push (push) Successful in 29s
Expand from 10 to 18 rack SKUs across NVIDIA, AMD, and custom ASIC vendors, each with distinct training vs inference FLOPS, VRAM capacity, cooling requirements, and interconnect technology. Adds cooling hierarchy (air/liquid/immersion) that gates rack deployment, VRAM requirements that gate model training by generation, interconnect multipliers for distributed training scaling, and PUE-based energy cost reduction for advanced cooling. Includes save migration from v4 to v5, 6 new research nodes, and UI updates showing split compute stats. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -62,9 +62,14 @@ export function StateInspectionTab() {
|
||||
|
||||
<Section title="Compute">
|
||||
<Stat label="Total FLOPS" value={formatFlops(compute.totalFlops)} />
|
||||
<Stat label="Training FLOPS" value={formatFlops(compute.totalTrainingFlops)} />
|
||||
<Stat label="Inference FLOPS" value={formatFlops(compute.totalInferenceFlops)} />
|
||||
<Stat label="Eff. Training" value={formatFlops(compute.effectiveTrainingFlops)} />
|
||||
<Stat label="Eff. Inference" value={formatFlops(compute.effectiveInferenceFlops)} />
|
||||
<Stat label="VRAM" value={`${formatNumber(compute.totalVramGB)} GB`} />
|
||||
<Stat label="Utilization" value={formatPercent(compute.inferenceUtilization)} />
|
||||
<Stat label="Training" value={formatPercent(compute.trainingAllocation)} />
|
||||
<Stat label="Inference" value={formatPercent(compute.inferenceAllocation)} />
|
||||
<Stat label="Training Alloc" value={formatPercent(compute.trainingAllocation)} />
|
||||
<Stat label="Inference Alloc" value={formatPercent(compute.inferenceAllocation)} />
|
||||
<Stat label="Capacity" value={`${formatNumber(compute.tokensPerSecondCapacity)} tok/s`} />
|
||||
<Stat label="Demand" value={`${formatNumber(compute.tokensPerSecondDemand)} tok/s`} />
|
||||
</Section>
|
||||
@@ -85,6 +90,9 @@ export function StateInspectionTab() {
|
||||
<Stat label="Racks Failed" value={totalFailedRacks} />
|
||||
<Stat label="In Pipeline" value={pipelineRacks} />
|
||||
<Stat label="Total FLOPS" value={formatFlops(infrastructure.totalFlops)} />
|
||||
<Stat label="Training FLOPS" value={formatFlops(infrastructure.totalTrainingFlops)} />
|
||||
<Stat label="Inference FLOPS" value={formatFlops(infrastructure.totalInferenceFlops)} />
|
||||
<Stat label="Total VRAM" value={`${formatNumber(infrastructure.totalVramGB)} GB`} />
|
||||
</Section>
|
||||
|
||||
<Section title="Reputation">
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
estimateNetworkSlots, maxComputeRacks,
|
||||
SWITCH_TIER_CONFIGS,
|
||||
DC_UPGRADE_COST_FRACTION, DC_UPGRADE_INCREMENT,
|
||||
skuTotalFlops,
|
||||
} from '@ai-tycoon/shared';
|
||||
import type {
|
||||
DCTier, RackSkuId, LocationId, PipelineStage, Era,
|
||||
@@ -357,7 +358,7 @@ function ClusterFillAllModal({ cluster, money, era, research, onConfirm, onClose
|
||||
}) {
|
||||
const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => {
|
||||
if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false;
|
||||
if (s.requiredResearch && !research.includes(s.requiredResearch)) return false;
|
||||
if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -540,7 +541,7 @@ function ClusterDetailView({ clusterId }: { clusterId: string }) {
|
||||
<div><span className="text-surface-400">FLOPS:</span> <span className="font-mono">{
|
||||
formatNumber(campus.dataCenters.reduce((s, d) => {
|
||||
const sku = d.rackSkuId ? RACK_SKU_CONFIGS[d.rackSkuId] : null;
|
||||
return s + (sku ? d.effectiveComputeRacks * sku.flopsPerRack : 0);
|
||||
return s + (sku ? d.effectiveComputeRacks * skuTotalFlops(sku) : 0);
|
||||
}, 0))
|
||||
}</span></div>
|
||||
</div>
|
||||
@@ -644,7 +645,7 @@ function FillAllDCsModal({ campus, money, era, research, onConfirm, onClose }: {
|
||||
}) {
|
||||
const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => {
|
||||
if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false;
|
||||
if (s.requiredResearch && !research.includes(s.requiredResearch)) return false;
|
||||
if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -754,7 +755,7 @@ function RetrofitCampusModal({ campus, era, research, onConfirm, onClose }: {
|
||||
|
||||
const targetSkus = Object.values(RACK_SKU_CONFIGS).filter(s => {
|
||||
if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false;
|
||||
if (s.requiredResearch && !research.includes(s.requiredResearch)) return false;
|
||||
if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -810,7 +811,7 @@ function RetrofitCampusModal({ campus, era, research, onConfirm, onClose }: {
|
||||
}`}>
|
||||
<div>
|
||||
<div className="font-medium">{s.name}</div>
|
||||
<div className="text-xs text-surface-400">{s.flopsPerRack} FLOPS | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}/rack</div>
|
||||
<div className="text-xs text-surface-400">{s.trainingFlops}T / {s.inferenceFlops}I FLOPS | {s.totalVramGB}GB | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}/rack</div>
|
||||
</div>
|
||||
{isCurrentOnly && <span className="text-xs text-surface-500">Current</span>}
|
||||
{selectedSku === s.id && <CheckCircle size={16} className="text-violet-400" />}
|
||||
@@ -1140,7 +1141,7 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
|
||||
const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => {
|
||||
if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false;
|
||||
if (s.requiredResearch && !research.includes(s.requiredResearch)) return false;
|
||||
if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false;
|
||||
if (dc.rackSkuId && dc.rackSkuId !== s.id) return false;
|
||||
return true;
|
||||
});
|
||||
@@ -1168,9 +1169,10 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
</div>
|
||||
|
||||
{/* Stats Grid */}
|
||||
<div className="grid grid-cols-4 gap-3">
|
||||
<div className="grid grid-cols-5 gap-3">
|
||||
<FleetStat icon={Cpu} label="Online" value={formatNumber(dc.computeRacksOnline)} sub={`of ${maxCompute} max compute`} />
|
||||
<FleetStat icon={Zap} label="FLOPS" value={formatNumber(sku ? dc.effectiveComputeRacks * sku.flopsPerRack : 0)} />
|
||||
<FleetStat icon={Zap} label="FLOPS" value={`${formatNumber(dc.dcTrainingFlops)}T / ${formatNumber(dc.dcInferenceFlops)}I`} />
|
||||
<FleetStat icon={HardDrive} label="VRAM" value={`${formatNumber(dc.dcTotalVramGB)} GB`} />
|
||||
<FleetStat icon={Activity} label="Uptime" value={formatPercent(dc.currentUptime)} />
|
||||
<FleetStat icon={DollarSign} label="Cost/s" value={formatMoney(dc.energyCostPerTick + dc.maintenanceCostPerTick)} />
|
||||
</div>
|
||||
@@ -1240,7 +1242,7 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
<input type="radio" name="sku" checked={selectedSku === s.id} onChange={() => setSelectedSku(s.id)} className="accent-accent" />
|
||||
<div className="flex-1">
|
||||
<div className="font-medium text-sm">{s.name}</div>
|
||||
<div className="text-xs text-surface-400">{s.flopsPerRack} FLOPS | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}</div>
|
||||
<div className="text-xs text-surface-400">{s.trainingFlops}T / {s.inferenceFlops}I FLOPS | {s.totalVramGB}GB | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}</div>
|
||||
</div>
|
||||
</label>
|
||||
))}
|
||||
@@ -1311,14 +1313,14 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
{Object.values(RACK_SKU_CONFIGS).filter(s => {
|
||||
if (s.id === dc.rackSkuId) return false;
|
||||
if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false;
|
||||
if (s.requiredResearch && !research.includes(s.requiredResearch)) return false;
|
||||
if (s.requiredResearch.length > 0 && !s.requiredResearch.every(r => research.includes(r))) return false;
|
||||
return true;
|
||||
}).map(s => (
|
||||
<button key={s.id} onClick={() => setConfirmRetrofit(s.id)}
|
||||
className="w-full flex items-center justify-between p-3 rounded-lg border border-surface-600 hover:border-accent/50 text-left">
|
||||
<div>
|
||||
<div className="font-medium text-sm">{s.name}</div>
|
||||
<div className="text-xs text-surface-400">{s.flopsPerRack} FLOPS | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}/rack</div>
|
||||
<div className="text-xs text-surface-400">{s.trainingFlops}T / {s.inferenceFlops}I FLOPS | {s.totalVramGB}GB | {s.powerDrawKW} kW | {formatMoney(s.baseCost)}/rack</div>
|
||||
</div>
|
||||
<RefreshCw size={14} className="text-surface-400" />
|
||||
</button>
|
||||
@@ -1332,6 +1334,24 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
{/* Upgrades Tab */}
|
||||
{activeTab === 'upgrades' && (
|
||||
<div className="bg-surface-800 border border-surface-700 rounded-xl p-4 space-y-4">
|
||||
{/* Cooling & Network Fabric */}
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
<div className="flex items-center gap-3 p-3 border border-surface-600 rounded-lg">
|
||||
<Thermometer size={18} className="text-cyan-400" />
|
||||
<div>
|
||||
<div className="text-xs text-surface-400">Cooling Type</div>
|
||||
<div className="font-medium text-sm capitalize">{dc.coolingType}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 p-3 border border-surface-600 rounded-lg">
|
||||
<Network size={18} className="text-blue-400" />
|
||||
<div>
|
||||
<div className="text-xs text-surface-400">Network Fabric</div>
|
||||
<div className="font-medium text-sm">{dc.networkFabric}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{(['cooling', 'redundancy'] as const).map(upgrade => {
|
||||
const level = upgrade === 'cooling' ? dc.coolingLevel : dc.redundancyLevel;
|
||||
const cost = tierConfig.baseCost * DC_UPGRADE_COST_FRACTION;
|
||||
|
||||
@@ -2,7 +2,7 @@ import { useState } from 'react';
|
||||
import { Brain, Play, Rocket, Globe, SlidersHorizontal, ChevronDown, ChevronUp } from 'lucide-react';
|
||||
import { TutorialHint } from '@/components/game/TutorialHint';
|
||||
import { useGameStore } from '@/store';
|
||||
import { formatNumber, formatPercent, formatDuration } from '@ai-tycoon/shared';
|
||||
import { formatNumber, formatPercent, formatDuration, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared';
|
||||
import type { TuningPreset } from '@ai-tycoon/shared';
|
||||
|
||||
export function ModelsPage() {
|
||||
@@ -10,6 +10,7 @@ export function ModelsPage() {
|
||||
const activeTraining = useGameStore((s) => s.models.activeTraining);
|
||||
const productLines = useGameStore((s) => s.models.productLines);
|
||||
const totalFlops = useGameStore((s) => s.compute.totalFlops);
|
||||
const totalVramGB = useGameStore((s) => s.compute.totalVramGB);
|
||||
const trainingAlloc = useGameStore((s) => s.compute.trainingAllocation);
|
||||
const totalData = useGameStore((s) => s.data.totalTrainingTokens);
|
||||
const startTraining = useGameStore((s) => s.startTraining);
|
||||
@@ -89,6 +90,14 @@ export function ModelsPage() {
|
||||
<div className="text-xs text-surface-500 mt-1">
|
||||
ETA: {formatDuration(activeTraining.totalTicks - activeTraining.progressTicks)}
|
||||
</div>
|
||||
{(() => {
|
||||
const reqVram = VRAM_REQUIREMENTS_BY_GENERATION[activeTraining.generation] ?? 0;
|
||||
return reqVram > 0 && totalVramGB < reqVram ? (
|
||||
<p className="text-xs text-error mt-2">
|
||||
Training stalled — requires {formatNumber(reqVram)} GB VRAM (have {formatNumber(totalVramGB)} GB). Deploy more GPU racks.
|
||||
</p>
|
||||
) : null;
|
||||
})()}
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
@@ -102,11 +111,15 @@ export function ModelsPage() {
|
||||
className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50"
|
||||
/>
|
||||
</div>
|
||||
<div className="grid grid-cols-3 gap-3 text-sm">
|
||||
<div className="grid grid-cols-4 gap-3 text-sm">
|
||||
<div className="bg-surface-800 rounded-lg p-3">
|
||||
<div className="text-xs text-surface-400">Training Compute</div>
|
||||
<div className="font-mono">{formatNumber(trainingFlops)} FLOPS</div>
|
||||
</div>
|
||||
<div className="bg-surface-800 rounded-lg p-3">
|
||||
<div className="text-xs text-surface-400">Available VRAM</div>
|
||||
<div className="font-mono">{formatNumber(totalVramGB)} GB</div>
|
||||
</div>
|
||||
<div className="bg-surface-800 rounded-lg p-3">
|
||||
<div className="text-xs text-surface-400">Training Data</div>
|
||||
<div className="font-mono">{formatNumber(totalData)} tokens</div>
|
||||
|
||||
@@ -10,6 +10,7 @@ import type {
|
||||
ActiveResearch, OwnedDataset, LocationId,
|
||||
DeploymentCohort, PipelineStage,
|
||||
CampusRetrofitQueue,
|
||||
CoolingType, NetworkFabric,
|
||||
} from '@ai-tycoon/shared';
|
||||
import type { FundingRoundType, OverloadPolicy, TuningPreset, ModelTuning } from '@ai-tycoon/shared';
|
||||
import {
|
||||
@@ -27,6 +28,7 @@ import {
|
||||
LOCATION_CONFIGS,
|
||||
estimateNetworkSlots, maxComputeRacks,
|
||||
uuid,
|
||||
COOLING_TYPE_CONFIGS, COOLING_ORDER, NETWORK_FABRIC_CONFIGS, FABRIC_ORDER,
|
||||
} from '@ai-tycoon/shared';
|
||||
import {
|
||||
emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary,
|
||||
@@ -93,6 +95,8 @@ interface Actions {
|
||||
startCampusRetrofit: (campusId: string, targetSkuId: RackSkuId, maxConcurrent: number) => void;
|
||||
cancelCampusRetrofit: (campusId: string) => void;
|
||||
upgradeDataCenter: (dataCenterId: string, upgrade: 'cooling' | 'redundancy') => void;
|
||||
upgradeCoolingType: (dataCenterId: string, targetCooling: CoolingType) => void;
|
||||
upgradeNetworkFabric: (dataCenterId: string, targetFabric: NetworkFabric) => void;
|
||||
startTraining: (job: Omit<TrainingJob, 'progressTicks'>) => void;
|
||||
deployModel: (modelId: string) => void;
|
||||
setProductPricing: (productLineId: string, field: string, value: number) => void;
|
||||
@@ -197,6 +201,9 @@ export function computeFillForDC(
|
||||
if (dc.rackSkuId !== null && dc.rackSkuId !== skuId) return { qty: 0, cost: 0 };
|
||||
|
||||
const sku = RACK_SKU_CONFIGS[skuId];
|
||||
const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType);
|
||||
if (!coolingOk) return { qty: 0, cost: 0 };
|
||||
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
||||
@@ -414,6 +421,11 @@ export const useGameStore = create<Store>()(
|
||||
retrofitState: null,
|
||||
coolingLevel: 0,
|
||||
redundancyLevel: 0,
|
||||
coolingType: 'air' as CoolingType,
|
||||
networkFabric: 'ethernet-100g' as NetworkFabric,
|
||||
dcTrainingFlops: 0,
|
||||
dcInferenceFlops: 0,
|
||||
dcTotalVramGB: 0,
|
||||
};
|
||||
|
||||
return {
|
||||
@@ -439,7 +451,10 @@ export const useGameStore = create<Store>()(
|
||||
const sku = RACK_SKU_CONFIGS[skuId];
|
||||
const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi'];
|
||||
if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s;
|
||||
if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s;
|
||||
if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s;
|
||||
|
||||
const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType);
|
||||
if (!coolingOk) return s;
|
||||
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
@@ -532,6 +547,11 @@ export const useGameStore = create<Store>()(
|
||||
retrofitState: null,
|
||||
coolingLevel: 0,
|
||||
redundancyLevel: 0,
|
||||
coolingType: 'air' as CoolingType,
|
||||
networkFabric: 'ethernet-100g' as NetworkFabric,
|
||||
dcTrainingFlops: 0,
|
||||
dcInferenceFlops: 0,
|
||||
dcTotalVramGB: 0,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -556,7 +576,10 @@ export const useGameStore = create<Store>()(
|
||||
const sku = RACK_SKU_CONFIGS[newSkuId];
|
||||
const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi'];
|
||||
if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s;
|
||||
if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s;
|
||||
if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s;
|
||||
|
||||
const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType);
|
||||
if (!coolingOk) return s;
|
||||
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
||||
const totalRacksToRetrofit = dc.computeRacksOnline + pipelineCount;
|
||||
@@ -604,12 +627,14 @@ export const useGameStore = create<Store>()(
|
||||
const sku = RACK_SKU_CONFIGS[skuId];
|
||||
const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi'];
|
||||
if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s;
|
||||
if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s;
|
||||
if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s;
|
||||
|
||||
let remainingMoney = s.economy.money;
|
||||
const dcUpdates = new Map<string, DeploymentCohort>();
|
||||
|
||||
for (const dc of found.campus.dataCenters) {
|
||||
const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType);
|
||||
if (!coolingOk) continue;
|
||||
const { qty, cost } = computeFillForDC(dc, skuId, remainingMoney);
|
||||
if (qty <= 0) continue;
|
||||
|
||||
@@ -649,7 +674,7 @@ export const useGameStore = create<Store>()(
|
||||
const sku = RACK_SKU_CONFIGS[skuId];
|
||||
const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi'];
|
||||
if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s;
|
||||
if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s;
|
||||
if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s;
|
||||
|
||||
let remainingMoney = s.economy.money;
|
||||
const allDcUpdates = new Map<string, DeploymentCohort>();
|
||||
@@ -657,6 +682,8 @@ export const useGameStore = create<Store>()(
|
||||
for (const campus of cluster.campuses) {
|
||||
if (campus.status !== 'operational') continue;
|
||||
for (const dc of campus.dataCenters) {
|
||||
const coolingOk = COOLING_ORDER.indexOf(sku.requiredCooling) <= COOLING_ORDER.indexOf(dc.coolingType);
|
||||
if (!coolingOk) continue;
|
||||
const { qty, cost } = computeFillForDC(dc, skuId, remainingMoney);
|
||||
if (qty <= 0) continue;
|
||||
|
||||
@@ -701,7 +728,7 @@ export const useGameStore = create<Store>()(
|
||||
const sku = RACK_SKU_CONFIGS[targetSkuId];
|
||||
const eraOrder: Era[] = ['startup', 'scaleup', 'bigtech', 'agi'];
|
||||
if (eraOrder.indexOf(s.meta.currentEra) < eraOrder.indexOf(sku.era)) return s;
|
||||
if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s;
|
||||
if (sku.requiredResearch.length > 0 && !sku.requiredResearch.every(r => s.research.completedResearch.includes(r))) return s;
|
||||
|
||||
const eligible: string[] = [];
|
||||
const skipped: string[] = [];
|
||||
@@ -792,6 +819,58 @@ export const useGameStore = create<Store>()(
|
||||
};
|
||||
}),
|
||||
|
||||
upgradeCoolingType: (dataCenterId, targetCooling) => set((s) => {
|
||||
const found = findDC(s.infrastructure, dataCenterId);
|
||||
if (!found) return s;
|
||||
const { dc } = found;
|
||||
if (dc.status !== 'operational') return s;
|
||||
|
||||
const currentIdx = COOLING_ORDER.indexOf(dc.coolingType);
|
||||
const targetIdx = COOLING_ORDER.indexOf(targetCooling);
|
||||
if (targetIdx <= currentIdx) return s;
|
||||
|
||||
// Research gates
|
||||
if (targetCooling === 'liquid' && !s.research.completedResearch.includes('liquid-cooling-tech')) return s;
|
||||
if (targetCooling === 'immersion' && !s.research.completedResearch.includes('immersion-cooling-tech')) return s;
|
||||
|
||||
const cost = COOLING_TYPE_CONFIGS[targetCooling].upgradeCost[dc.tier];
|
||||
if (s.economy.money < cost) return s;
|
||||
|
||||
return {
|
||||
economy: { ...s.economy, money: s.economy.money - cost },
|
||||
infrastructure: updateDCInInfra(s.infrastructure, dataCenterId, (d) => ({
|
||||
...d,
|
||||
coolingType: targetCooling,
|
||||
})),
|
||||
};
|
||||
}),
|
||||
|
||||
upgradeNetworkFabric: (dataCenterId, targetFabric) => set((s) => {
|
||||
const found = findDC(s.infrastructure, dataCenterId);
|
||||
if (!found) return s;
|
||||
const { dc } = found;
|
||||
if (dc.status !== 'operational') return s;
|
||||
|
||||
const currentIdx = FABRIC_ORDER.indexOf(dc.networkFabric);
|
||||
const targetIdx = FABRIC_ORDER.indexOf(targetFabric);
|
||||
if (targetIdx <= currentIdx) return s;
|
||||
|
||||
// InfiniBand requires research
|
||||
if ((targetFabric === 'infiniband-ndr' || targetFabric === 'infiniband-xdr')
|
||||
&& !s.research.completedResearch.includes('infiniband-networking')) return s;
|
||||
|
||||
const cost = NETWORK_FABRIC_CONFIGS[targetFabric].upgradeCost[dc.tier];
|
||||
if (s.economy.money < cost) return s;
|
||||
|
||||
return {
|
||||
economy: { ...s.economy, money: s.economy.money - cost },
|
||||
infrastructure: updateDCInInfra(s.infrastructure, dataCenterId, (d) => ({
|
||||
...d,
|
||||
networkFabric: targetFabric,
|
||||
})),
|
||||
};
|
||||
}),
|
||||
|
||||
// --- Non-infrastructure actions (unchanged) ---
|
||||
|
||||
startTraining: (job) => set((s) => ({
|
||||
@@ -979,7 +1058,7 @@ export const useGameStore = create<Store>()(
|
||||
notifications: [{
|
||||
id: uuid(),
|
||||
title: 'Save Reset',
|
||||
message: 'Your save was reset due to a major infrastructure redesign — Hypercluster scale! Build clusters, campuses, and data centers.',
|
||||
message: 'Your save was reset due to a major rack system overhaul — 20 SKUs with training/inference specialization, VRAM, cooling tech, interconnects, and AMD/ASIC vendors!',
|
||||
type: 'info' as const,
|
||||
tick: 0,
|
||||
read: false,
|
||||
|
||||
@@ -25,7 +25,7 @@ export const TECH_TREE: ResearchNode[] = [
|
||||
{
|
||||
id: 'advanced-gpu-arch',
|
||||
name: 'Advanced GPU Architecture',
|
||||
description: 'Unlocks procurement of NVIDIA A100 rack configurations.',
|
||||
description: 'Unlocks NVIDIA A100 PCIe and SXM rack configurations.',
|
||||
era: 'startup',
|
||||
category: 'infrastructure',
|
||||
prerequisites: [],
|
||||
@@ -35,7 +35,7 @@ export const TECH_TREE: ResearchNode[] = [
|
||||
{
|
||||
id: 'next-gen-gpu',
|
||||
name: 'Next-Gen GPU Architecture',
|
||||
description: 'Unlocks procurement of NVIDIA H100 rack configurations.',
|
||||
description: 'Unlocks NVIDIA H100 and H200 rack configurations.',
|
||||
era: 'scaleup',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['advanced-gpu-arch'],
|
||||
@@ -45,7 +45,7 @@ export const TECH_TREE: ResearchNode[] = [
|
||||
{
|
||||
id: 'frontier-compute',
|
||||
name: 'Frontier Compute',
|
||||
description: 'Unlocks procurement of NVIDIA B200 rack configurations.',
|
||||
description: 'Unlocks NVIDIA B100 and B200 rack configurations.',
|
||||
era: 'bigtech',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['next-gen-gpu'],
|
||||
@@ -55,13 +55,73 @@ export const TECH_TREE: ResearchNode[] = [
|
||||
{
|
||||
id: 'custom-silicon',
|
||||
name: 'Custom Silicon Design',
|
||||
description: 'Design and fabricate custom AI ASICs for maximum efficiency.',
|
||||
description: 'Unlocks custom Training and Inference ASIC configurations.',
|
||||
era: 'agi',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['frontier-compute'],
|
||||
cost: { researchPoints: 10, compute: 500, ticks: 900 },
|
||||
effects: [{ type: 'unlock_rack', target: 'custom', value: 1 }],
|
||||
},
|
||||
{
|
||||
id: 'amd-ecosystem',
|
||||
name: 'AMD ROCm Ecosystem',
|
||||
description: 'Adapt software stack for AMD GPUs. Unlocks MI250X, MI300X, MI325X racks.',
|
||||
era: 'scaleup',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['advanced-gpu-arch'],
|
||||
cost: { researchPoints: 2, compute: 30, ticks: 200 },
|
||||
effects: [{ type: 'unlock_rack', target: 'amd', value: 1 }],
|
||||
},
|
||||
{
|
||||
id: 'inference-specialization',
|
||||
name: 'Inference Specialization',
|
||||
description: 'Optimized inference kernels unlock L4, L40S, and custom inference racks.',
|
||||
era: 'scaleup',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['quantization'],
|
||||
cost: { researchPoints: 2, compute: 20, ticks: 150 },
|
||||
effects: [{ type: 'unlock_rack', target: 'inference', value: 1 }],
|
||||
},
|
||||
{
|
||||
id: 'rack-scale-compute',
|
||||
name: 'Rack-Scale Computing',
|
||||
description: 'Full NVLink domain architecture. Unlocks GB200 NVL72 — 72 GPUs in a single rack.',
|
||||
era: 'agi',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['frontier-compute'],
|
||||
cost: { researchPoints: 8, compute: 400, ticks: 720 },
|
||||
effects: [{ type: 'unlock_rack', target: 'gb200-nvl72', value: 1 }],
|
||||
},
|
||||
{
|
||||
id: 'liquid-cooling-tech',
|
||||
name: 'Liquid Cooling Systems',
|
||||
description: 'Enables liquid cooling upgrades for data centers. Required for SXM and high-power racks.',
|
||||
era: 'scaleup',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['advanced-cooling'],
|
||||
cost: { researchPoints: 2, compute: 25, ticks: 180 },
|
||||
effects: [{ type: 'unlock_feature', target: 'liquid-cooling', value: 1 }],
|
||||
},
|
||||
{
|
||||
id: 'immersion-cooling-tech',
|
||||
name: 'Immersion Cooling',
|
||||
description: 'Enables immersion cooling for maximum power density. Required for custom ASICs.',
|
||||
era: 'bigtech',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['liquid-cooling-tech'],
|
||||
cost: { researchPoints: 5, compute: 100, ticks: 400 },
|
||||
effects: [{ type: 'unlock_feature', target: 'immersion-cooling', value: 1 }],
|
||||
},
|
||||
{
|
||||
id: 'infiniband-networking',
|
||||
name: 'InfiniBand Networking',
|
||||
description: 'High-bandwidth interconnect for distributed training. Unlocks InfiniBand fabric upgrades.',
|
||||
era: 'scaleup',
|
||||
category: 'infrastructure',
|
||||
prerequisites: ['network-engineering-i'],
|
||||
cost: { researchPoints: 3, compute: 40, ticks: 240 },
|
||||
effects: [{ type: 'unlock_feature', target: 'infiniband', value: 1 }],
|
||||
},
|
||||
{
|
||||
id: 'dc-engineering-ii',
|
||||
name: 'DC Engineering II',
|
||||
|
||||
@@ -3,19 +3,44 @@ import { FLOPS_TO_TOKENS_MULTIPLIER } from '@ai-tycoon/shared';
|
||||
|
||||
export interface CapacityResult {
|
||||
totalFlops: number;
|
||||
totalTrainingFlops: number;
|
||||
totalInferenceFlops: number;
|
||||
totalVramGB: number;
|
||||
trainingAllocation: number;
|
||||
inferenceAllocation: number;
|
||||
effectiveTrainingFlops: number;
|
||||
effectiveInferenceFlops: number;
|
||||
tokensPerSecondCapacity: number;
|
||||
}
|
||||
|
||||
export function computeCapacity(state: GameState, infrastructure: InfrastructureState): CapacityResult {
|
||||
const totalFlops = infrastructure.totalFlops;
|
||||
const { totalTrainingFlops, totalInferenceFlops, totalVramGB } = infrastructure;
|
||||
const trainingAllocation = state.compute.trainingAllocation;
|
||||
const inferenceAllocation = 1 - trainingAllocation;
|
||||
const inferenceFlops = totalFlops * inferenceAllocation;
|
||||
const tokensPerSecondCapacity = inferenceFlops * FLOPS_TO_TOKENS_MULTIPLIER;
|
||||
|
||||
return { totalFlops, trainingAllocation, inferenceAllocation, tokensPerSecondCapacity };
|
||||
// Training hardware can do inference at ~50% efficiency
|
||||
// Inference hardware can do training at ~30% efficiency (no NVLink, poor scaling)
|
||||
const effectiveTrainingFlops =
|
||||
totalTrainingFlops * trainingAllocation +
|
||||
totalInferenceFlops * trainingAllocation * 0.3;
|
||||
|
||||
const effectiveInferenceFlops =
|
||||
totalInferenceFlops * inferenceAllocation +
|
||||
totalTrainingFlops * inferenceAllocation * 0.5;
|
||||
|
||||
const tokensPerSecondCapacity = effectiveInferenceFlops * FLOPS_TO_TOKENS_MULTIPLIER;
|
||||
|
||||
return {
|
||||
totalFlops: totalTrainingFlops + totalInferenceFlops,
|
||||
totalTrainingFlops,
|
||||
totalInferenceFlops,
|
||||
totalVramGB,
|
||||
trainingAllocation,
|
||||
inferenceAllocation,
|
||||
effectiveTrainingFlops,
|
||||
effectiveInferenceFlops,
|
||||
tokensPerSecondCapacity,
|
||||
};
|
||||
}
|
||||
|
||||
export function finalizeCompute(capacity: CapacityResult, totalTokenDemand: number): ComputeState {
|
||||
|
||||
@@ -2,7 +2,7 @@ import type {
|
||||
GameState, InfrastructureState, Cluster, Campus, DataCenter,
|
||||
DeploymentCohort, PipelineStage, RackSkuId, NetworkSwitch,
|
||||
SwitchTier, DCNetworkSummary, CampusNetworkSummary, ClusterNetworkSummary,
|
||||
CampusRetrofitQueue, DCTier,
|
||||
CampusRetrofitQueue, DCTier, IntraNodeInterconnect, NetworkFabric, RackSkuConfig,
|
||||
} from '@ai-tycoon/shared';
|
||||
import {
|
||||
LOCATION_CONFIGS,
|
||||
@@ -19,6 +19,8 @@ import {
|
||||
T3_COUNT_PER_DC_TIER,
|
||||
SWITCH_REPAIR_COST_FRACTION,
|
||||
NETWORK_DEGRADATION,
|
||||
COOLING_TYPE_CONFIGS,
|
||||
NETWORK_FABRIC_CONFIGS,
|
||||
estimateNetworkSlots,
|
||||
} from '@ai-tycoon/shared';
|
||||
import type { TickNotification } from '../tick';
|
||||
@@ -435,6 +437,30 @@ function processNetworkTick(
|
||||
return { switchRepairCosts, notifications, dirty };
|
||||
}
|
||||
|
||||
// --- Interconnect Training Multiplier ---
|
||||
|
||||
const INTRA_NODE_BONUS: Record<IntraNodeInterconnect, number> = {
|
||||
'pcie-gen4': 0.0,
|
||||
'pcie-gen5': 0.05,
|
||||
'nvlink-3': 0.15,
|
||||
'nvlink-4': 0.25,
|
||||
'nvlink-5': 0.35,
|
||||
'nvlink-domain': 0.50,
|
||||
'infinity-fabric': 0.10,
|
||||
'custom-mesh': 0.40,
|
||||
};
|
||||
|
||||
function computeInterconnectMultiplier(
|
||||
sku: RackSkuConfig,
|
||||
rackCount: number,
|
||||
fabric: NetworkFabric,
|
||||
): number {
|
||||
if (rackCount <= 1) return 1.0;
|
||||
const intra = INTRA_NODE_BONUS[sku.intraNodeInterconnect] ?? 0;
|
||||
const fabricBonus = NETWORK_FABRIC_CONFIGS[fabric].trainingScalingBonus;
|
||||
return Math.min(1.0, 0.6 + intra + fabricBonus);
|
||||
}
|
||||
|
||||
// --- Main Infrastructure Tick ---
|
||||
|
||||
export function processInfrastructure(state: GameState): InfraTickResult {
|
||||
@@ -463,6 +489,9 @@ export function processInfrastructure(state: GameState): InfraTickResult {
|
||||
notifications.push(...netResult.notifications);
|
||||
|
||||
let totalFlops = 0;
|
||||
let totalTrainingFlops = 0;
|
||||
let totalInferenceFlops = 0;
|
||||
let totalVramGB = 0;
|
||||
let totalUptime = 0;
|
||||
let totalRackCount = 0;
|
||||
let totalComputeRackCount = 0;
|
||||
@@ -684,14 +713,23 @@ export function processInfrastructure(state: GameState): InfraTickResult {
|
||||
|
||||
let usedPowerKW = 0;
|
||||
let dcFlops = 0;
|
||||
let dcTrainingFlops = 0;
|
||||
let dcInferenceFlops = 0;
|
||||
let dcTotalVramGB = 0;
|
||||
if (dc.rackSkuId && computeRacksOnline > 0) {
|
||||
const sku = RACK_SKU_CONFIGS[dc.rackSkuId];
|
||||
usedPowerKW = computeRacksOnline * sku.powerDrawKW;
|
||||
dcFlops = effectiveComputeRacks * sku.flopsPerRack * networkSummary.effectiveFlopsFraction;
|
||||
const bwFraction = networkSummary.effectiveFlopsFraction;
|
||||
const interconnectMult = computeInterconnectMultiplier(sku, effectiveComputeRacks, dc.networkFabric);
|
||||
dcTrainingFlops = effectiveComputeRacks * sku.trainingFlops * bwFraction * interconnectMult;
|
||||
dcInferenceFlops = effectiveComputeRacks * sku.inferenceFlops * bwFraction;
|
||||
dcTotalVramGB = computeRacksOnline * sku.totalVramGB;
|
||||
dcFlops = dcTrainingFlops + dcInferenceFlops;
|
||||
}
|
||||
|
||||
const pue = COOLING_TYPE_CONFIGS[dc.coolingType].pueMultiplier;
|
||||
const energyCostPerTick = (tierConfig.baseEnergyCostPerTick + usedPowerKW * BASE_ENERGY_COST_PER_FLOP)
|
||||
* location.energyCostMultiplier;
|
||||
* location.energyCostMultiplier * pue;
|
||||
const maintenanceCostPerTick = totalRacksInDc * BASE_MAINTENANCE_PER_RACK;
|
||||
const currentUptime = totalRacksInDc > 0 ? effectiveComputeRacks / totalRacksInDc : 1;
|
||||
|
||||
@@ -703,6 +741,9 @@ export function processInfrastructure(state: GameState): InfraTickResult {
|
||||
}
|
||||
|
||||
totalFlops += dcFlops;
|
||||
totalTrainingFlops += dcTrainingFlops;
|
||||
totalInferenceFlops += dcInferenceFlops;
|
||||
totalVramGB += dcTotalVramGB;
|
||||
totalRackCount += totalRacksInDc + netSlots;
|
||||
totalComputeRackCount += totalRacksInDc;
|
||||
totalDataCenterCount++;
|
||||
@@ -714,6 +755,7 @@ export function processInfrastructure(state: GameState): InfraTickResult {
|
||||
deploymentCohorts: updatedCohorts,
|
||||
networkSummary, effectiveComputeRacks,
|
||||
usedSlots, usedPowerKW, energyCostPerTick, maintenanceCostPerTick, currentUptime,
|
||||
dcTrainingFlops, dcInferenceFlops, dcTotalVramGB,
|
||||
};
|
||||
});
|
||||
|
||||
@@ -788,6 +830,9 @@ export function processInfrastructure(state: GameState): InfraTickResult {
|
||||
clusters,
|
||||
switchRegistry: registry,
|
||||
totalFlops,
|
||||
totalTrainingFlops,
|
||||
totalInferenceFlops,
|
||||
totalVramGB,
|
||||
totalUptime: dcWithRacks > 0 ? totalUptime / dcWithRacks : 1,
|
||||
totalRackCount,
|
||||
totalComputeRackCount,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { GameState, ModelsState, TrainedModel, ModelCapabilities } from '@ai-tycoon/shared';
|
||||
import { uuid } from '@ai-tycoon/shared';
|
||||
import { uuid, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared';
|
||||
|
||||
export interface ModelTickResult {
|
||||
modelsState: ModelsState;
|
||||
@@ -12,6 +12,11 @@ export function processModels(state: GameState): ModelTickResult {
|
||||
return { modelsState: state.models, modelCompleted: null };
|
||||
}
|
||||
|
||||
const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[active.generation] ?? 0;
|
||||
if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) {
|
||||
return { modelsState: state.models, modelCompleted: null };
|
||||
}
|
||||
|
||||
const researcherBoost = state.talent.departments.research.headcount *
|
||||
state.talent.departments.research.effectiveness;
|
||||
const engineerBoost = state.talent.departments.engineering.headcount *
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig } from '../types/infrastructure';
|
||||
import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig, CoolingType, CoolingTypeConfig, NetworkFabric, NetworkFabricConfig } from '../types/infrastructure';
|
||||
|
||||
export const TICK_INTERVAL_MS = 1000;
|
||||
export const MAX_OFFLINE_TICKS = 86_400;
|
||||
@@ -209,18 +209,87 @@ export function maxComputeRacks(totalSlots: number, dcTier: DCTier): number {
|
||||
return lo;
|
||||
}
|
||||
|
||||
// --- Cooling Type Configs ---
|
||||
|
||||
export const COOLING_TYPE_CONFIGS: Record<CoolingType, CoolingTypeConfig> = {
|
||||
air: {
|
||||
name: 'Air Cooling',
|
||||
upgradeCost: { small: 0, medium: 0, large: 0, mega: 0 },
|
||||
upgradeTimeTicks: 0,
|
||||
pueMultiplier: 1.0,
|
||||
},
|
||||
liquid: {
|
||||
name: 'Liquid Cooling',
|
||||
upgradeCost: { small: 200_000, medium: 600_000, large: 2_000_000, mega: 6_000_000 },
|
||||
upgradeTimeTicks: 300,
|
||||
pueMultiplier: 0.85,
|
||||
},
|
||||
immersion: {
|
||||
name: 'Immersion Cooling',
|
||||
upgradeCost: { small: 500_000, medium: 1_500_000, large: 5_000_000, mega: 15_000_000 },
|
||||
upgradeTimeTicks: 600,
|
||||
pueMultiplier: 0.70,
|
||||
},
|
||||
};
|
||||
|
||||
export const COOLING_ORDER: CoolingType[] = ['air', 'liquid', 'immersion'];
|
||||
|
||||
// --- Network Fabric Configs ---
|
||||
|
||||
export const NETWORK_FABRIC_CONFIGS: Record<NetworkFabric, NetworkFabricConfig> = {
|
||||
'ethernet-100g': {
|
||||
name: '100G Ethernet',
|
||||
upgradeCost: { small: 0, medium: 0, large: 0, mega: 0 },
|
||||
upgradeTimeTicks: 0,
|
||||
trainingScalingBonus: 0,
|
||||
},
|
||||
'ethernet-400g': {
|
||||
name: '400G Ethernet',
|
||||
upgradeCost: { small: 100_000, medium: 300_000, large: 1_000_000, mega: 3_000_000 },
|
||||
upgradeTimeTicks: 200,
|
||||
trainingScalingBonus: 0.10,
|
||||
},
|
||||
'infiniband-ndr': {
|
||||
name: 'InfiniBand NDR',
|
||||
upgradeCost: { small: 300_000, medium: 900_000, large: 3_000_000, mega: 9_000_000 },
|
||||
upgradeTimeTicks: 400,
|
||||
trainingScalingBonus: 0.25,
|
||||
},
|
||||
'infiniband-xdr': {
|
||||
name: 'InfiniBand XDR',
|
||||
upgradeCost: { small: 800_000, medium: 2_400_000, large: 8_000_000, mega: 24_000_000 },
|
||||
upgradeTimeTicks: 600,
|
||||
trainingScalingBonus: 0.40,
|
||||
},
|
||||
};
|
||||
|
||||
export const FABRIC_ORDER: NetworkFabric[] = ['ethernet-100g', 'ethernet-400g', 'infiniband-ndr', 'infiniband-xdr'];
|
||||
|
||||
// --- Rack SKU Configs ---
|
||||
|
||||
export function skuTotalFlops(sku: RackSkuConfig): number {
|
||||
return sku.trainingFlops + sku.inferenceFlops;
|
||||
}
|
||||
|
||||
export const RACK_SKU_CONFIGS: Record<RackSkuId, RackSkuConfig> = {
|
||||
// === STARTUP ERA ===
|
||||
'consumer-x4': {
|
||||
id: 'consumer-x4',
|
||||
name: 'Consumer GPU x4',
|
||||
era: 'startup',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'RTX Consumer',
|
||||
gpuCount: 4,
|
||||
flopsPerRack: 4,
|
||||
trainingFlops: 2,
|
||||
inferenceFlops: 4,
|
||||
vramPerGpuGB: 12,
|
||||
totalVramGB: 48,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'pcie-gen4',
|
||||
intraNodeBandwidthGBps: 64,
|
||||
powerDrawKW: 0.4,
|
||||
baseCost: 3_200,
|
||||
requiredResearch: null,
|
||||
requiredResearch: [],
|
||||
pipelineTimeTicks: { manufacturing: 20, receiving: 10, installation: 15, testing: 15 },
|
||||
testFailureRate: 0.05,
|
||||
productionFailureRate: 0.0002,
|
||||
@@ -230,11 +299,19 @@ export const RACK_SKU_CONFIGS: Record<RackSkuId, RackSkuConfig> = {
|
||||
id: 't4-x4',
|
||||
name: 'NVIDIA T4 x4',
|
||||
era: 'startup',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'T4',
|
||||
gpuCount: 4,
|
||||
flopsPerRack: 32,
|
||||
powerDrawKW: 1.2,
|
||||
baseCost: 20_000,
|
||||
requiredResearch: null,
|
||||
trainingFlops: 8,
|
||||
inferenceFlops: 32,
|
||||
vramPerGpuGB: 16,
|
||||
totalVramGB: 64,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'pcie-gen4',
|
||||
intraNodeBandwidthGBps: 64,
|
||||
powerDrawKW: 0.5,
|
||||
baseCost: 12_000,
|
||||
requiredResearch: [],
|
||||
pipelineTimeTicks: { manufacturing: 30, receiving: 15, installation: 25, testing: 20 },
|
||||
testFailureRate: 0.07,
|
||||
productionFailureRate: 0.0003,
|
||||
@@ -243,115 +320,370 @@ export const RACK_SKU_CONFIGS: Record<RackSkuId, RackSkuConfig> = {
|
||||
't4-x8': {
|
||||
id: 't4-x8',
|
||||
name: 'NVIDIA T4 x8',
|
||||
era: 'scaleup',
|
||||
era: 'startup',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'T4',
|
||||
gpuCount: 8,
|
||||
flopsPerRack: 64,
|
||||
powerDrawKW: 2.4,
|
||||
baseCost: 38_000,
|
||||
requiredResearch: null,
|
||||
trainingFlops: 16,
|
||||
inferenceFlops: 64,
|
||||
vramPerGpuGB: 16,
|
||||
totalVramGB: 128,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'pcie-gen4',
|
||||
intraNodeBandwidthGBps: 64,
|
||||
powerDrawKW: 1.0,
|
||||
baseCost: 22_000,
|
||||
requiredResearch: [],
|
||||
pipelineTimeTicks: { manufacturing: 40, receiving: 20, installation: 30, testing: 30 },
|
||||
testFailureRate: 0.08,
|
||||
productionFailureRate: 0.0003,
|
||||
repairCostFraction: 0.12,
|
||||
},
|
||||
'a100-x4': {
|
||||
id: 'a100-x4',
|
||||
name: 'NVIDIA A100 x4',
|
||||
|
||||
// === SCALEUP ERA ===
|
||||
'l4-x8': {
|
||||
id: 'l4-x8',
|
||||
name: 'NVIDIA L4 x8',
|
||||
era: 'scaleup',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'L4',
|
||||
gpuCount: 8,
|
||||
trainingFlops: 30,
|
||||
inferenceFlops: 180,
|
||||
vramPerGpuGB: 24,
|
||||
totalVramGB: 192,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'pcie-gen5',
|
||||
intraNodeBandwidthGBps: 128,
|
||||
powerDrawKW: 0.8,
|
||||
baseCost: 28_000,
|
||||
requiredResearch: ['inference-specialization'],
|
||||
pipelineTimeTicks: { manufacturing: 35, receiving: 15, installation: 25, testing: 25 },
|
||||
testFailureRate: 0.07,
|
||||
productionFailureRate: 0.0002,
|
||||
repairCostFraction: 0.10,
|
||||
},
|
||||
'a100-pcie-x4': {
|
||||
id: 'a100-pcie-x4',
|
||||
name: 'A100 PCIe x4',
|
||||
era: 'scaleup',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'A100 PCIe 80GB',
|
||||
gpuCount: 4,
|
||||
flopsPerRack: 160,
|
||||
powerDrawKW: 4.0,
|
||||
baseCost: 60_000,
|
||||
requiredResearch: 'advanced-gpu-arch',
|
||||
trainingFlops: 100,
|
||||
inferenceFlops: 140,
|
||||
vramPerGpuGB: 80,
|
||||
totalVramGB: 320,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'pcie-gen4',
|
||||
intraNodeBandwidthGBps: 64,
|
||||
powerDrawKW: 2.0,
|
||||
baseCost: 55_000,
|
||||
requiredResearch: ['advanced-gpu-arch'],
|
||||
pipelineTimeTicks: { manufacturing: 60, receiving: 25, installation: 50, testing: 45 },
|
||||
testFailureRate: 0.10,
|
||||
productionFailureRate: 0.0004,
|
||||
repairCostFraction: 0.15,
|
||||
},
|
||||
'a100-x8': {
|
||||
id: 'a100-x8',
|
||||
name: 'NVIDIA A100 x8',
|
||||
'a100-sxm-x8': {
|
||||
id: 'a100-sxm-x8',
|
||||
name: 'A100 SXM x8',
|
||||
era: 'scaleup',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'A100 SXM 80GB',
|
||||
gpuCount: 8,
|
||||
flopsPerRack: 320,
|
||||
powerDrawKW: 8.0,
|
||||
trainingFlops: 320,
|
||||
inferenceFlops: 200,
|
||||
vramPerGpuGB: 80,
|
||||
totalVramGB: 640,
|
||||
requiredCooling: 'liquid',
|
||||
intraNodeInterconnect: 'nvlink-3',
|
||||
intraNodeBandwidthGBps: 600,
|
||||
powerDrawKW: 5.0,
|
||||
baseCost: 115_000,
|
||||
requiredResearch: 'advanced-gpu-arch',
|
||||
requiredResearch: ['advanced-gpu-arch'],
|
||||
pipelineTimeTicks: { manufacturing: 70, receiving: 30, installation: 55, testing: 55 },
|
||||
testFailureRate: 0.12,
|
||||
productionFailureRate: 0.0004,
|
||||
repairCostFraction: 0.15,
|
||||
},
|
||||
'h100-x4': {
|
||||
id: 'h100-x4',
|
||||
name: 'NVIDIA H100 x4',
|
||||
era: 'bigtech',
|
||||
gpuCount: 4,
|
||||
flopsPerRack: 480,
|
||||
'mi250x-x8': {
|
||||
id: 'mi250x-x8',
|
||||
name: 'AMD MI250X x8',
|
||||
era: 'scaleup',
|
||||
gpuVendor: 'amd',
|
||||
gpuModel: 'MI250X',
|
||||
gpuCount: 8,
|
||||
trainingFlops: 240,
|
||||
inferenceFlops: 160,
|
||||
vramPerGpuGB: 128,
|
||||
totalVramGB: 1024,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'infinity-fabric',
|
||||
intraNodeBandwidthGBps: 400,
|
||||
powerDrawKW: 5.6,
|
||||
baseCost: 140_000,
|
||||
requiredResearch: 'next-gen-gpu',
|
||||
pipelineTimeTicks: { manufacturing: 80, receiving: 30, installation: 65, testing: 65 },
|
||||
baseCost: 80_000,
|
||||
requiredResearch: ['amd-ecosystem'],
|
||||
pipelineTimeTicks: { manufacturing: 75, receiving: 30, installation: 60, testing: 60 },
|
||||
testFailureRate: 0.15,
|
||||
productionFailureRate: 0.0005,
|
||||
repairCostFraction: 0.18,
|
||||
},
|
||||
'h100-x8': {
|
||||
id: 'h100-x8',
|
||||
name: 'NVIDIA H100 x8',
|
||||
|
||||
// === BIG TECH ERA ===
|
||||
'h100-pcie-x4': {
|
||||
id: 'h100-pcie-x4',
|
||||
name: 'H100 PCIe x4',
|
||||
era: 'bigtech',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'H100 PCIe 80GB',
|
||||
gpuCount: 4,
|
||||
trainingFlops: 180,
|
||||
inferenceFlops: 480,
|
||||
vramPerGpuGB: 80,
|
||||
totalVramGB: 320,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'pcie-gen5',
|
||||
intraNodeBandwidthGBps: 128,
|
||||
powerDrawKW: 1.8,
|
||||
baseCost: 130_000,
|
||||
requiredResearch: ['next-gen-gpu'],
|
||||
pipelineTimeTicks: { manufacturing: 80, receiving: 30, installation: 65, testing: 65 },
|
||||
testFailureRate: 0.12,
|
||||
productionFailureRate: 0.0004,
|
||||
repairCostFraction: 0.15,
|
||||
},
|
||||
'h100-sxm-x8': {
|
||||
id: 'h100-sxm-x8',
|
||||
name: 'H100 SXM x8',
|
||||
era: 'bigtech',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'H100 SXM 80GB',
|
||||
gpuCount: 8,
|
||||
flopsPerRack: 960,
|
||||
powerDrawKW: 11.2,
|
||||
baseCost: 270_000,
|
||||
requiredResearch: 'next-gen-gpu',
|
||||
trainingFlops: 960,
|
||||
inferenceFlops: 600,
|
||||
vramPerGpuGB: 80,
|
||||
totalVramGB: 640,
|
||||
requiredCooling: 'liquid',
|
||||
intraNodeInterconnect: 'nvlink-4',
|
||||
intraNodeBandwidthGBps: 900,
|
||||
powerDrawKW: 7.0,
|
||||
baseCost: 280_000,
|
||||
requiredResearch: ['next-gen-gpu'],
|
||||
pipelineTimeTicks: { manufacturing: 90, receiving: 35, installation: 75, testing: 80 },
|
||||
testFailureRate: 0.18,
|
||||
productionFailureRate: 0.0005,
|
||||
repairCostFraction: 0.18,
|
||||
},
|
||||
'b200-x4': {
|
||||
id: 'b200-x4',
|
||||
name: 'NVIDIA B200 x4',
|
||||
'h200-sxm-x8': {
|
||||
id: 'h200-sxm-x8',
|
||||
name: 'H200 SXM x8',
|
||||
era: 'bigtech',
|
||||
gpuCount: 4,
|
||||
flopsPerRack: 1600,
|
||||
powerDrawKW: 8.0,
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'H200 SXM 141GB',
|
||||
gpuCount: 8,
|
||||
trainingFlops: 1000,
|
||||
inferenceFlops: 650,
|
||||
vramPerGpuGB: 141,
|
||||
totalVramGB: 1128,
|
||||
requiredCooling: 'liquid',
|
||||
intraNodeInterconnect: 'nvlink-4',
|
||||
intraNodeBandwidthGBps: 900,
|
||||
powerDrawKW: 7.0,
|
||||
baseCost: 340_000,
|
||||
requiredResearch: ['next-gen-gpu'],
|
||||
pipelineTimeTicks: { manufacturing: 95, receiving: 35, installation: 80, testing: 85 },
|
||||
testFailureRate: 0.18,
|
||||
productionFailureRate: 0.0005,
|
||||
repairCostFraction: 0.18,
|
||||
},
|
||||
'mi300x-x8': {
|
||||
id: 'mi300x-x8',
|
||||
name: 'AMD MI300X x8',
|
||||
era: 'bigtech',
|
||||
gpuVendor: 'amd',
|
||||
gpuModel: 'MI300X 192GB',
|
||||
gpuCount: 8,
|
||||
trainingFlops: 700,
|
||||
inferenceFlops: 450,
|
||||
vramPerGpuGB: 192,
|
||||
totalVramGB: 1536,
|
||||
requiredCooling: 'liquid',
|
||||
intraNodeInterconnect: 'infinity-fabric',
|
||||
intraNodeBandwidthGBps: 500,
|
||||
powerDrawKW: 7.5,
|
||||
baseCost: 200_000,
|
||||
requiredResearch: 'frontier-compute',
|
||||
requiredResearch: ['amd-ecosystem'],
|
||||
pipelineTimeTicks: { manufacturing: 100, receiving: 35, installation: 75, testing: 80 },
|
||||
testFailureRate: 0.20,
|
||||
productionFailureRate: 0.0006,
|
||||
repairCostFraction: 0.20,
|
||||
},
|
||||
'l40s-x8': {
|
||||
id: 'l40s-x8',
|
||||
name: 'NVIDIA L40S x8',
|
||||
era: 'bigtech',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'L40S 48GB',
|
||||
gpuCount: 8,
|
||||
trainingFlops: 120,
|
||||
inferenceFlops: 900,
|
||||
vramPerGpuGB: 48,
|
||||
totalVramGB: 384,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'pcie-gen5',
|
||||
intraNodeBandwidthGBps: 128,
|
||||
powerDrawKW: 3.5,
|
||||
baseCost: 160_000,
|
||||
requiredResearch: ['inference-specialization'],
|
||||
pipelineTimeTicks: { manufacturing: 70, receiving: 25, installation: 55, testing: 50 },
|
||||
testFailureRate: 0.10,
|
||||
productionFailureRate: 0.0003,
|
||||
repairCostFraction: 0.12,
|
||||
},
|
||||
'b100-x8': {
|
||||
id: 'b100-x8',
|
||||
name: 'NVIDIA B100 x8',
|
||||
era: 'bigtech',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'B100 192GB',
|
||||
gpuCount: 8,
|
||||
trainingFlops: 1800,
|
||||
inferenceFlops: 1100,
|
||||
vramPerGpuGB: 192,
|
||||
totalVramGB: 1536,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'nvlink-5',
|
||||
intraNodeBandwidthGBps: 1800,
|
||||
powerDrawKW: 7.0,
|
||||
baseCost: 320_000,
|
||||
requiredResearch: ['frontier-compute'],
|
||||
pipelineTimeTicks: { manufacturing: 100, receiving: 40, installation: 80, testing: 80 },
|
||||
testFailureRate: 0.20,
|
||||
productionFailureRate: 0.0006,
|
||||
repairCostFraction: 0.20,
|
||||
},
|
||||
'b200-x8': {
|
||||
id: 'b200-x8',
|
||||
name: 'NVIDIA B200 x8',
|
||||
|
||||
// === AGI ERA ===
|
||||
'b200-sxm-x8': {
|
||||
id: 'b200-sxm-x8',
|
||||
name: 'B200 SXM x8',
|
||||
era: 'agi',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'B200 SXM 192GB',
|
||||
gpuCount: 8,
|
||||
flopsPerRack: 3200,
|
||||
powerDrawKW: 16.0,
|
||||
baseCost: 380_000,
|
||||
requiredResearch: 'frontier-compute',
|
||||
trainingFlops: 3200,
|
||||
inferenceFlops: 1800,
|
||||
vramPerGpuGB: 192,
|
||||
totalVramGB: 1536,
|
||||
requiredCooling: 'liquid',
|
||||
intraNodeInterconnect: 'nvlink-5',
|
||||
intraNodeBandwidthGBps: 1800,
|
||||
powerDrawKW: 10.0,
|
||||
baseCost: 400_000,
|
||||
requiredResearch: ['frontier-compute'],
|
||||
pipelineTimeTicks: { manufacturing: 120, receiving: 45, installation: 95, testing: 100 },
|
||||
testFailureRate: 0.22,
|
||||
productionFailureRate: 0.0006,
|
||||
repairCostFraction: 0.20,
|
||||
},
|
||||
'custom-x8': {
|
||||
id: 'custom-x8',
|
||||
name: 'Custom ASIC x8',
|
||||
'gb200-nvl72': {
|
||||
id: 'gb200-nvl72',
|
||||
name: 'GB200 NVL72',
|
||||
era: 'agi',
|
||||
gpuVendor: 'nvidia',
|
||||
gpuModel: 'B200 NVL72',
|
||||
gpuCount: 72,
|
||||
trainingFlops: 36_000,
|
||||
inferenceFlops: 18_000,
|
||||
vramPerGpuGB: 192,
|
||||
totalVramGB: 13_824,
|
||||
requiredCooling: 'liquid',
|
||||
intraNodeInterconnect: 'nvlink-domain',
|
||||
intraNodeBandwidthGBps: 14_400,
|
||||
powerDrawKW: 120.0,
|
||||
baseCost: 2_500_000,
|
||||
requiredResearch: ['frontier-compute', 'rack-scale-compute'],
|
||||
pipelineTimeTicks: { manufacturing: 180, receiving: 60, installation: 120, testing: 120 },
|
||||
testFailureRate: 0.28,
|
||||
productionFailureRate: 0.0008,
|
||||
repairCostFraction: 0.15,
|
||||
},
|
||||
'mi325x-x8': {
|
||||
id: 'mi325x-x8',
|
||||
name: 'AMD MI325X x8',
|
||||
era: 'agi',
|
||||
gpuVendor: 'amd',
|
||||
gpuModel: 'MI325X 256GB',
|
||||
gpuCount: 8,
|
||||
flopsPerRack: 6400,
|
||||
trainingFlops: 2400,
|
||||
inferenceFlops: 1400,
|
||||
vramPerGpuGB: 256,
|
||||
totalVramGB: 2048,
|
||||
requiredCooling: 'liquid',
|
||||
intraNodeInterconnect: 'infinity-fabric',
|
||||
intraNodeBandwidthGBps: 600,
|
||||
powerDrawKW: 7.5,
|
||||
baseCost: 280_000,
|
||||
requiredResearch: ['amd-ecosystem'],
|
||||
pipelineTimeTicks: { manufacturing: 130, receiving: 40, installation: 90, testing: 95 },
|
||||
testFailureRate: 0.22,
|
||||
productionFailureRate: 0.0006,
|
||||
repairCostFraction: 0.20,
|
||||
},
|
||||
'custom-training-x8': {
|
||||
id: 'custom-training-x8',
|
||||
name: 'Training ASIC x8',
|
||||
era: 'agi',
|
||||
gpuVendor: 'custom',
|
||||
gpuModel: 'Custom Training ASIC',
|
||||
gpuCount: 8,
|
||||
trainingFlops: 8000,
|
||||
inferenceFlops: 2000,
|
||||
vramPerGpuGB: 256,
|
||||
totalVramGB: 2048,
|
||||
requiredCooling: 'immersion',
|
||||
intraNodeInterconnect: 'custom-mesh',
|
||||
intraNodeBandwidthGBps: 3200,
|
||||
powerDrawKW: 20.0,
|
||||
baseCost: 640_000,
|
||||
requiredResearch: 'custom-silicon',
|
||||
baseCost: 700_000,
|
||||
requiredResearch: ['custom-silicon'],
|
||||
pipelineTimeTicks: { manufacturing: 140, receiving: 50, installation: 100, testing: 110 },
|
||||
testFailureRate: 0.25,
|
||||
productionFailureRate: 0.0008,
|
||||
repairCostFraction: 0.20,
|
||||
},
|
||||
'custom-inference-x16': {
|
||||
id: 'custom-inference-x16',
|
||||
name: 'Inference ASIC x16',
|
||||
era: 'agi',
|
||||
gpuVendor: 'custom',
|
||||
gpuModel: 'Custom Inference ASIC',
|
||||
gpuCount: 16,
|
||||
trainingFlops: 800,
|
||||
inferenceFlops: 12_000,
|
||||
vramPerGpuGB: 32,
|
||||
totalVramGB: 512,
|
||||
requiredCooling: 'air',
|
||||
intraNodeInterconnect: 'custom-mesh',
|
||||
intraNodeBandwidthGBps: 1600,
|
||||
powerDrawKW: 5.0,
|
||||
baseCost: 500_000,
|
||||
requiredResearch: ['custom-silicon', 'inference-specialization'],
|
||||
pipelineTimeTicks: { manufacturing: 130, receiving: 45, installation: 90, testing: 100 },
|
||||
testFailureRate: 0.22,
|
||||
productionFailureRate: 0.0007,
|
||||
repairCostFraction: 0.18,
|
||||
},
|
||||
};
|
||||
|
||||
export const VRAM_REQUIREMENTS_BY_GENERATION: Record<number, number> = {
|
||||
1: 48,
|
||||
2: 192,
|
||||
3: 640,
|
||||
4: 1536,
|
||||
5: 4096,
|
||||
6: 16384,
|
||||
};
|
||||
|
||||
// --- Pipeline & Infrastructure Constants ---
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
export interface ComputeState {
|
||||
totalFlops: number;
|
||||
totalTrainingFlops: number;
|
||||
totalInferenceFlops: number;
|
||||
totalVramGB: number;
|
||||
trainingAllocation: number;
|
||||
inferenceAllocation: number;
|
||||
effectiveTrainingFlops: number;
|
||||
effectiveInferenceFlops: number;
|
||||
inferenceUtilization: number;
|
||||
tokensPerSecondCapacity: number;
|
||||
tokensPerSecondDemand: number;
|
||||
@@ -9,8 +14,13 @@ export interface ComputeState {
|
||||
|
||||
export const INITIAL_COMPUTE: ComputeState = {
|
||||
totalFlops: 0,
|
||||
totalTrainingFlops: 0,
|
||||
totalInferenceFlops: 0,
|
||||
totalVramGB: 0,
|
||||
trainingAllocation: 0.5,
|
||||
inferenceAllocation: 0.5,
|
||||
effectiveTrainingFlops: 0,
|
||||
effectiveInferenceFlops: 0,
|
||||
inferenceUtilization: 0,
|
||||
tokensPerSecondCapacity: 0,
|
||||
tokensPerSecondDemand: 0,
|
||||
|
||||
@@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = {
|
||||
sfxVolume: 0.7,
|
||||
};
|
||||
|
||||
export const SAVE_VERSION = 4;
|
||||
export const SAVE_VERSION = 5;
|
||||
|
||||
@@ -75,12 +75,17 @@ export interface DataCenter {
|
||||
retrofitState: RetrofitState | null;
|
||||
coolingLevel: number;
|
||||
redundancyLevel: number;
|
||||
coolingType: CoolingType;
|
||||
networkFabric: NetworkFabric;
|
||||
effectiveComputeRacks: number;
|
||||
usedSlots: number;
|
||||
usedPowerKW: number;
|
||||
energyCostPerTick: number;
|
||||
maintenanceCostPerTick: number;
|
||||
currentUptime: number;
|
||||
dcTrainingFlops: number;
|
||||
dcInferenceFlops: number;
|
||||
dcTotalVramGB: number;
|
||||
}
|
||||
|
||||
// --- Network Topology (6-Tier Clos) ---
|
||||
@@ -141,13 +146,46 @@ export interface ClusterNetworkSummary {
|
||||
crossCampusBandwidth: number;
|
||||
}
|
||||
|
||||
// --- Cooling, Interconnect & Vendor Types ---
|
||||
|
||||
export type CoolingType = 'air' | 'liquid' | 'immersion';
|
||||
export type GpuVendor = 'nvidia' | 'amd' | 'custom';
|
||||
export type IntraNodeInterconnect =
|
||||
| 'pcie-gen4' | 'pcie-gen5'
|
||||
| 'nvlink-3' | 'nvlink-4' | 'nvlink-5' | 'nvlink-domain'
|
||||
| 'infinity-fabric' | 'custom-mesh';
|
||||
export type NetworkFabric =
|
||||
| 'ethernet-100g' | 'ethernet-400g'
|
||||
| 'infiniband-ndr' | 'infiniband-xdr';
|
||||
|
||||
export interface CoolingTypeConfig {
|
||||
name: string;
|
||||
upgradeCost: Record<DCTier, number>;
|
||||
upgradeTimeTicks: number;
|
||||
pueMultiplier: number;
|
||||
}
|
||||
|
||||
export interface NetworkFabricConfig {
|
||||
name: string;
|
||||
upgradeCost: Record<DCTier, number>;
|
||||
upgradeTimeTicks: number;
|
||||
trainingScalingBonus: number;
|
||||
}
|
||||
|
||||
// --- Racks ---
|
||||
|
||||
export type RackSkuId =
|
||||
// Startup
|
||||
| 'consumer-x4' | 't4-x4' | 't4-x8'
|
||||
| 'a100-x4' | 'a100-x8'
|
||||
| 'h100-x4' | 'h100-x8'
|
||||
| 'b200-x4' | 'b200-x8' | 'custom-x8';
|
||||
// Scaleup
|
||||
| 'a100-pcie-x4' | 'a100-sxm-x8' | 'mi250x-x8' | 'l4-x8'
|
||||
// Big Tech
|
||||
| 'h100-pcie-x4' | 'h100-sxm-x8' | 'h200-sxm-x8'
|
||||
| 'mi300x-x8' | 'l40s-x8' | 'b100-x8'
|
||||
// AGI
|
||||
| 'b200-sxm-x8' | 'gb200-nvl72'
|
||||
| 'mi325x-x8'
|
||||
| 'custom-training-x8' | 'custom-inference-x16';
|
||||
|
||||
export type PipelineStage =
|
||||
| 'ordered' | 'manufacturing' | 'receiving'
|
||||
@@ -164,11 +202,19 @@ export interface RackSkuConfig {
|
||||
id: RackSkuId;
|
||||
name: string;
|
||||
era: Era;
|
||||
gpuVendor: GpuVendor;
|
||||
gpuModel: string;
|
||||
gpuCount: number;
|
||||
flopsPerRack: number;
|
||||
trainingFlops: number;
|
||||
inferenceFlops: number;
|
||||
vramPerGpuGB: number;
|
||||
totalVramGB: number;
|
||||
requiredCooling: CoolingType;
|
||||
intraNodeInterconnect: IntraNodeInterconnect;
|
||||
intraNodeBandwidthGBps: number;
|
||||
powerDrawKW: number;
|
||||
baseCost: number;
|
||||
requiredResearch: string | null;
|
||||
requiredResearch: string[];
|
||||
pipelineTimeTicks: PipelineTimings;
|
||||
testFailureRate: number;
|
||||
productionFailureRate: number;
|
||||
@@ -218,6 +264,9 @@ export interface InfrastructureState {
|
||||
clusters: Cluster[];
|
||||
switchRegistry: Record<string, NetworkSwitch>;
|
||||
totalFlops: number;
|
||||
totalTrainingFlops: number;
|
||||
totalInferenceFlops: number;
|
||||
totalVramGB: number;
|
||||
totalUptime: number;
|
||||
totalRackCount: number;
|
||||
totalComputeRackCount: number;
|
||||
@@ -229,6 +278,9 @@ export const INITIAL_INFRASTRUCTURE: InfrastructureState = {
|
||||
clusters: [],
|
||||
switchRegistry: {},
|
||||
totalFlops: 0,
|
||||
totalTrainingFlops: 0,
|
||||
totalInferenceFlops: 0,
|
||||
totalVramGB: 0,
|
||||
totalUptime: 1,
|
||||
totalRackCount: 0,
|
||||
totalComputeRackCount: 0,
|
||||
|
||||
Reference in New Issue
Block a user