Files
AIHostingTycoon/packages/game-engine/src/systems/infrastructureSystem.ts
T
josh 00e790591e
CI / build-and-push (push) Successful in 32s
Game balance audit: wire research effects, rework capability formula, fix dead systems
- Create researchBonuses utility to aggregate tech tree effects into all game systems
  (infrastructure energy costs, compute efficiency, training speed, model capability, reputation)
- Rework model capability from sqrt(compute) to 4-pillar formula (params + compute + data + research)
- Make context window affect benchmarks and inference speed
- Add MoE tradeoffs: 1.5x VRAM, 0.8x training speed
- Enforce research point costs as a gate for unlocking research
- Add real consequences to data contamination events (reputation hit, legal costs)
- Scale talent costs from $0.03 to $5/tick per headcount
- Scale compliance costs 100x to be meaningful
- Rework competitor acquisition: cheaper but grants headcount, RP, and reputation
- Remove dead code: sfxVolume, autoSaveInterval, notificationsEnabled,
  FAST_FORWARD_BATCH_SIZE, CHINCHILLA_OPTIMAL_RATIO

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-25 09:36:31 -04:00

850 lines
32 KiB
TypeScript

import type {
GameState, InfrastructureState, Cluster, Campus, DataCenter,
DeploymentCohort, PipelineStage, RackSkuId, NetworkSwitch,
SwitchTier, DCNetworkSummary, CampusNetworkSummary, ClusterNetworkSummary,
CampusRetrofitQueue, DCTier, IntraNodeInterconnect, NetworkFabric, RackSkuConfig,
} from '@ai-tycoon/shared';
import {
LOCATION_CONFIGS,
RACK_SKU_CONFIGS,
DC_TIER_CONFIGS,
BASE_ENERGY_COST_PER_FLOP,
BASE_MAINTENANCE_PER_RACK,
COOLING_FAILURE_REDUCTION,
REDUNDANCY_FAILURE_REDUCTION,
RACK_REPAIR_BASE_TICKS,
COHORT_SCALE_FACTOR,
PIPELINE_ORDER_BASE_TICKS,
SWITCH_TIER_CONFIGS,
T3_COUNT_PER_DC_TIER,
SWITCH_REPAIR_COST_FRACTION,
NETWORK_DEGRADATION,
COOLING_TYPE_CONFIGS,
NETWORK_FABRIC_CONFIGS,
estimateNetworkSlots,
} from '@ai-tycoon/shared';
import type { TickNotification } from '../tick';
import type { ResearchBonuses } from './researchBonuses';
export interface InfraTickResult {
infrastructure: InfrastructureState;
notifications: TickNotification[];
repairCosts: number;
}
// --- Pipeline helpers ---
const PIPELINE_ADVANCE_ORDER: PipelineStage[] = [
'ordered', 'manufacturing', 'receiving', 'installation', 'testing',
];
function nextStage(stage: PipelineStage): PipelineStage | 'production' {
const idx = PIPELINE_ADVANCE_ORDER.indexOf(stage);
if (idx === -1 || idx === PIPELINE_ADVANCE_ORDER.length - 1) return 'production';
return PIPELINE_ADVANCE_ORDER[idx + 1];
}
function cohortStageTotal(stage: PipelineStage, skuId: string, count: number): number {
const sku = RACK_SKU_CONFIGS[skuId as keyof typeof RACK_SKU_CONFIGS];
const timings = sku.pipelineTimeTicks;
let base: number;
switch (stage) {
case 'ordered': base = PIPELINE_ORDER_BASE_TICKS; break;
case 'manufacturing': base = timings.manufacturing; break;
case 'receiving': base = timings.receiving; break;
case 'installation': base = timings.installation; break;
case 'testing': base = timings.testing; break;
case 'repair': base = RACK_REPAIR_BASE_TICKS; break;
case 'decommission': base = timings.installation; break;
case 'network-down': base = 0; break;
default: base = 0;
}
return Math.ceil(base * (1 + COHORT_SCALE_FACTOR * count));
}
function stageSpeed(stage: PipelineStage, engEff: number, opsEff: number): number {
switch (stage) {
case 'manufacturing': return 1 + engEff * 0.1;
case 'installation':
case 'testing':
case 'decommission': return 1 + opsEff * 0.1;
case 'repair': return 1 + opsEff * 0.05;
case 'network-down': return 0;
default: return 1;
}
}
function binomialSample(n: number, p: number): number {
if (n <= 0 || p <= 0) return 0;
if (p >= 1) return n;
const expected = n * p;
const base = Math.floor(expected);
const frac = expected - base;
return base + (Math.random() < frac ? 1 : 0);
}
// --- Network Topology Construction ---
let switchIdCounter = 0;
function createSwitch(
tier: SwitchTier,
dcId: string | null,
campusId: string | null,
clusterId: string | null,
): NetworkSwitch {
const config = SWITCH_TIER_CONFIGS[tier];
return {
id: `${tier}-${dcId ?? campusId ?? clusterId ?? 'x'}-${switchIdCounter++}`,
tier,
status: 'healthy',
dcId, campusId, clusterId,
uplinkIds: [],
downlinkIds: [],
activeUplinks: config.uplinkCount,
totalUplinks: config.uplinkCount,
effectiveBandwidth: 1.0,
repairProgress: 0,
repairTotal: 0,
};
}
function wireUplinks(child: NetworkSwitch, parents: NetworkSwitch[], count: number): void {
if (parents.length === 0) return;
for (let i = 0; i < count; i++) {
const parent = parents[i % parents.length];
child.uplinkIds.push(parent.id);
if (!parent.downlinkIds.includes(child.id)) {
parent.downlinkIds.push(child.id);
}
}
child.activeUplinks = count;
child.effectiveBandwidth = 1.0;
}
export function emptyDCNetworkSummary(): DCNetworkSummary {
return {
switchIds: [], networkRackCount: 0,
totalByTier: {}, healthyByTier: {},
racksDisconnected: 0, racksDegraded: 0,
averageBandwidth: 1, effectiveFlopsFraction: 1,
};
}
export function emptyCampusNetworkSummary(): CampusNetworkSummary {
return { switchIds: [], totalT4: 0, healthyT4: 0, crossDCBandwidth: 1 };
}
export function emptyClusterNetworkSummary(): ClusterNetworkSummary {
return { switchIds: [], totalT5: 0, healthyT5: 0, crossCampusBandwidth: 1 };
}
export function buildDCTopology(
computeRackCount: number,
dcTier: DCTier,
dcId: string,
registry: Record<string, NetworkSwitch>,
): DCNetworkSummary {
if (computeRackCount <= 0) return emptyDCNetworkSummary();
const switchIds: string[] = [];
const t3Count = T3_COUNT_PER_DC_TIER[dcTier];
const t3s: NetworkSwitch[] = [];
for (let i = 0; i < t3Count; i++) {
const sw = createSwitch('t3', dcId, null, null);
sw.totalUplinks = 0;
sw.activeUplinks = 0;
t3s.push(sw);
registry[sw.id] = sw;
switchIds.push(sw.id);
}
const t1Count = Math.ceil(computeRackCount / SWITCH_TIER_CONFIGS.t1.fanOut);
const t2Count = Math.ceil(t1Count / SWITCH_TIER_CONFIGS.t2.fanOut);
const t2s: NetworkSwitch[] = [];
for (let i = 0; i < t2Count; i++) {
const sw = createSwitch('t2', dcId, null, null);
wireUplinks(sw, t3s, SWITCH_TIER_CONFIGS.t2.uplinkCount);
t2s.push(sw);
registry[sw.id] = sw;
switchIds.push(sw.id);
}
const t1s: NetworkSwitch[] = [];
for (let i = 0; i < t1Count; i++) {
const sw = createSwitch('t1', dcId, null, null);
wireUplinks(sw, t2s, SWITCH_TIER_CONFIGS.t1.uplinkCount);
t1s.push(sw);
registry[sw.id] = sw;
switchIds.push(sw.id);
}
for (let i = 0; i < computeRackCount; i++) {
const sw = createSwitch('tor', dcId, null, null);
const primary = t1s[Math.floor(i / SWITCH_TIER_CONFIGS.t1.fanOut)];
const altIdx = (Math.floor(i / SWITCH_TIER_CONFIGS.t1.fanOut) + 1) % t1s.length;
const alt = t1s[altIdx];
if (t1s.length >= 2 && primary !== alt) {
wireUplinks(sw, [primary, alt], 2);
} else {
wireUplinks(sw, [primary], 2);
}
registry[sw.id] = sw;
switchIds.push(sw.id);
}
const networkRackCount = estimateNetworkSlots(computeRackCount, dcTier);
return buildDCSummary(switchIds, networkRackCount, registry);
}
export function expandDCTopology(
existing: DCNetworkSummary,
newRackCount: number,
dcTier: DCTier,
dcId: string,
registry: Record<string, NetworkSwitch>,
): DCNetworkSummary {
if (newRackCount <= 0) return existing;
const currentTorCount = existing.totalByTier?.tor ?? 0;
const targetTorCount = currentTorCount + newRackCount;
const t1s = existing.switchIds.map(id => registry[id]).filter((s): s is NetworkSwitch => !!s && s.tier === 't1');
const t2s = existing.switchIds.map(id => registry[id]).filter((s): s is NetworkSwitch => !!s && s.tier === 't2');
const t3s = existing.switchIds.map(id => registry[id]).filter((s): s is NetworkSwitch => !!s && s.tier === 't3');
const newIds = [...existing.switchIds];
const neededT1 = Math.ceil(targetTorCount / SWITCH_TIER_CONFIGS.t1.fanOut);
const neededT2 = Math.ceil(neededT1 / SWITCH_TIER_CONFIGS.t2.fanOut);
while (t2s.length < neededT2) {
const sw = createSwitch('t2', dcId, null, null);
wireUplinks(sw, t3s, SWITCH_TIER_CONFIGS.t2.uplinkCount);
t2s.push(sw);
registry[sw.id] = sw;
newIds.push(sw.id);
}
while (t1s.length < neededT1) {
const sw = createSwitch('t1', dcId, null, null);
wireUplinks(sw, t2s, SWITCH_TIER_CONFIGS.t1.uplinkCount);
t1s.push(sw);
registry[sw.id] = sw;
newIds.push(sw.id);
}
for (let i = 0; i < newRackCount; i++) {
const torIdx = currentTorCount + i;
const sw = createSwitch('tor', dcId, null, null);
const primary = t1s[Math.floor(torIdx / SWITCH_TIER_CONFIGS.t1.fanOut)];
const altIdx = (Math.floor(torIdx / SWITCH_TIER_CONFIGS.t1.fanOut) + 1) % t1s.length;
const alt = t1s[altIdx];
if (t1s.length >= 2 && primary !== alt) {
wireUplinks(sw, [primary, alt], 2);
} else {
wireUplinks(sw, [primary], 2);
}
registry[sw.id] = sw;
newIds.push(sw.id);
}
const networkRackCount = estimateNetworkSlots(targetTorCount, dcTier);
return buildDCSummary(newIds, networkRackCount, registry);
}
export function shrinkDCTopology(
existing: DCNetworkSummary,
removeCount: number,
dcTier: DCTier,
registry: Record<string, NetworkSwitch>,
): DCNetworkSummary {
if (removeCount <= 0) return existing;
const torIds = existing.switchIds.filter(id => registry[id]?.tier === 'tor');
const toRemove = new Set(torIds.slice(-removeCount));
for (const torId of toRemove) {
const tor = registry[torId];
if (!tor) continue;
for (const upId of tor.uplinkIds) {
const parent = registry[upId];
if (parent) parent.downlinkIds = parent.downlinkIds.filter(id => id !== torId);
}
delete registry[torId];
}
const remainingIds = existing.switchIds.filter(id => !toRemove.has(id));
const remainingTors = remainingIds.filter(id => registry[id]?.tier === 'tor').length;
return buildDCSummary(remainingIds, estimateNetworkSlots(remainingTors, dcTier), registry);
}
function computeRackBandwidth(tor: NetworkSwitch, registry: Record<string, NetworkSwitch>): number {
if (tor.status !== 'healthy') return 0;
let minBW = tor.totalUplinks > 0 ? tor.activeUplinks / tor.totalUplinks : 1;
if (minBW === 0) return 0;
const visited = new Set<string>();
let current = tor.uplinkIds.filter(id => {
const sw = registry[id];
return sw && sw.status === 'healthy';
});
while (current.length > 0) {
let tierBW = 1;
const next: string[] = [];
for (const sid of current) {
if (visited.has(sid)) continue;
visited.add(sid);
const sw = registry[sid];
if (!sw || sw.status !== 'healthy') continue;
const bw = sw.totalUplinks > 0 ? sw.activeUplinks / sw.totalUplinks : 1;
tierBW = Math.min(tierBW, bw);
for (const upId of sw.uplinkIds) {
if (registry[upId]?.status === 'healthy') next.push(upId);
}
}
minBW = Math.min(minBW, tierBW);
if (minBW === 0) return 0;
current = next;
}
return minBW;
}
function buildDCSummary(
switchIds: string[],
networkRackCount: number,
registry: Record<string, NetworkSwitch>,
): DCNetworkSummary {
const totalByTier: Partial<Record<SwitchTier, number>> = {};
const healthyByTier: Partial<Record<SwitchTier, number>> = {};
let disconnected = 0;
let degraded = 0;
let bwSum = 0;
let torCount = 0;
for (const sid of switchIds) {
const sw = registry[sid];
if (!sw) continue;
totalByTier[sw.tier] = (totalByTier[sw.tier] ?? 0) + 1;
if (sw.status === 'healthy') healthyByTier[sw.tier] = (healthyByTier[sw.tier] ?? 0) + 1;
if (sw.tier === 'tor') {
torCount++;
const bw = computeRackBandwidth(sw, registry);
bwSum += bw;
if (bw === 0) disconnected++;
else if (bw < 1) degraded++;
}
}
const avgBW = torCount > 0 ? bwSum / torCount : 1;
return {
switchIds, networkRackCount, totalByTier, healthyByTier,
racksDisconnected: disconnected, racksDegraded: degraded,
averageBandwidth: avgBW, effectiveFlopsFraction: avgBW,
};
}
// --- Network Tick (failure rolls + repair) ---
function processNetworkTick(
registry: Record<string, NetworkSwitch>,
networkResearchBonus: number,
opsEff: number,
repairSpeedBonus: number,
hotStandbyTicks: number,
redundancyBonus: number,
): { switchRepairCosts: number; notifications: TickNotification[]; dirty: boolean } {
const notifications: TickNotification[] = [];
let switchRepairCosts = 0;
let dirty = false;
const healthyByTier: Partial<Record<SwitchTier, NetworkSwitch[]>> = {};
const repairing: NetworkSwitch[] = [];
const failed: NetworkSwitch[] = [];
for (const sw of Object.values(registry)) {
if (sw.status === 'healthy') {
(healthyByTier[sw.tier] ??= []).push(sw);
} else if (sw.status === 'repairing') {
repairing.push(sw);
} else if (sw.status === 'failed') {
failed.push(sw);
}
}
const tiers: SwitchTier[] = ['tor', 't1', 't2', 't3', 't4', 't5'];
const newlyFailed: NetworkSwitch[] = [];
for (const tier of tiers) {
const healthy = healthyByTier[tier];
if (!healthy || healthy.length === 0) continue;
const rate = SWITCH_TIER_CONFIGS[tier].failureRatePerTick * (1 - networkResearchBonus);
const count = binomialSample(healthy.length, rate);
if (count > 0) {
const shuffled = [...healthy].sort(() => Math.random() - 0.5);
for (let i = 0; i < count; i++) {
const sw = shuffled[i];
const baseRepair = SWITCH_TIER_CONFIGS[tier].repairBaseTicks;
const repairTime = hotStandbyTicks > 0
? hotStandbyTicks
: baseRepair * (1 - repairSpeedBonus);
sw.status = 'repairing';
sw.repairProgress = 0;
sw.repairTotal = repairTime;
newlyFailed.push(sw);
switchRepairCosts += SWITCH_TIER_CONFIGS[tier].baseCost * SWITCH_REPAIR_COST_FRACTION;
}
dirty = true;
}
}
for (const sw of repairing) {
sw.repairProgress += 1 + opsEff * 0.05;
if (sw.repairProgress >= sw.repairTotal) {
sw.status = 'healthy';
sw.repairProgress = 0;
sw.repairTotal = 0;
dirty = true;
}
}
if (dirty) {
for (const sw of Object.values(registry)) {
if (sw.uplinkIds.length === 0) continue;
let active = 0;
for (const upId of sw.uplinkIds) {
if (registry[upId]?.status === 'healthy') active++;
}
sw.activeUplinks = active;
sw.effectiveBandwidth = sw.totalUplinks > 0 ? Math.min(1, (active + redundancyBonus) / sw.totalUplinks) : 1;
}
}
for (const sw of newlyFailed) {
if (sw.tier === 't3') {
notifications.push({ title: 'Core Network Failure', message: `Tier-3 core switch failed — potential DC disconnect!`, type: 'danger' });
} else if (sw.tier === 't4') {
notifications.push({ title: 'Campus Network Failure', message: `Tier-4 campus switch failed — cross-DC degradation!`, type: 'danger' });
} else if (sw.tier === 't2') {
notifications.push({ title: 'Network Switch Failure', message: `Tier-2 spine switch failed — racks may be degraded.`, type: 'warning' });
}
}
return { switchRepairCosts, notifications, dirty };
}
// --- Interconnect Training Multiplier ---
const INTRA_NODE_BONUS: Record<IntraNodeInterconnect, number> = {
'pcie-gen4': 0.0,
'pcie-gen5': 0.05,
'nvlink-3': 0.15,
'nvlink-4': 0.25,
'nvlink-5': 0.35,
'nvlink-domain': 0.50,
'infinity-fabric': 0.10,
'custom-mesh': 0.40,
};
function computeInterconnectMultiplier(
sku: RackSkuConfig,
rackCount: number,
fabric: NetworkFabric,
): number {
if (rackCount <= 1) return 1.0;
const intra = INTRA_NODE_BONUS[sku.intraNodeInterconnect] ?? 0;
const fabricBonus = NETWORK_FABRIC_CONFIGS[fabric].trainingScalingBonus;
return Math.min(1.0, 0.6 + intra + fabricBonus);
}
// --- Main Infrastructure Tick ---
export function processInfrastructure(state: GameState, researchBonuses?: ResearchBonuses): InfraTickResult {
const notifications: TickNotification[] = [];
let repairCosts = 0;
const engEff = state.talent.departments.engineering.effectiveness;
const opsEff = state.talent.departments.operations.effectiveness;
const qaResearchBonus = state.research.completedResearch.includes('quality-assurance') ? 0.25 : 0;
const netResearch1 = state.research.completedResearch.includes('network-engineering-i') ? 0.4 : 0;
const netResearch2 = state.research.completedResearch.includes('network-engineering-ii') ? 0.5 : 0;
const networkResearchBonus = Math.min(0.8, netResearch1 + netResearch2);
const repairSpeedBonus = state.research.completedResearch.includes('network-fast-repair') ? 0.4 : 0;
const hotStandbyTicks = state.research.completedResearch.includes('network-hot-standby') ? 5 : 0;
const redundancyBonus = state.research.completedResearch.includes('network-redundancy') ? 1 : 0;
// Clone switch registry for mutable operations this tick
const registry: Record<string, NetworkSwitch> = {};
for (const [id, sw] of Object.entries(state.infrastructure.switchRegistry)) {
registry[id] = { ...sw, uplinkIds: [...sw.uplinkIds], downlinkIds: [...sw.downlinkIds] };
}
// Process network failures/repairs globally
const netResult = processNetworkTick(registry, networkResearchBonus, opsEff, repairSpeedBonus, hotStandbyTicks, redundancyBonus);
repairCosts += netResult.switchRepairCosts;
notifications.push(...netResult.notifications);
let totalFlops = 0;
let totalTrainingFlops = 0;
let totalInferenceFlops = 0;
let totalVramGB = 0;
let totalUptime = 0;
let totalRackCount = 0;
let totalComputeRackCount = 0;
let totalDataCenterCount = 0;
let dcWithRacks = 0;
let globalLatencyPenalty = 0;
let latencyDCCount = 0;
const clusters: Cluster[] = state.infrastructure.clusters.map(cluster => {
if (cluster.status === 'constructing') {
const newProgress = cluster.constructionProgress + 1;
if (newProgress >= cluster.constructionTotal) {
notifications.push({
title: 'Cluster Online',
message: `${cluster.name} cluster in ${LOCATION_CONFIGS[cluster.locationId].name} is now operational!`,
type: 'success',
});
return { ...cluster, constructionProgress: cluster.constructionTotal, status: 'operational' as const };
}
return { ...cluster, constructionProgress: newProgress };
}
const campuses: Campus[] = cluster.campuses.map(campus => {
if (campus.status === 'constructing') {
const newProgress = campus.constructionProgress + 1;
if (newProgress >= campus.constructionTotal) {
notifications.push({ title: 'Campus Ready', message: `Campus ${campus.name} is now operational!`, type: 'success' });
return { ...campus, constructionProgress: campus.constructionTotal, status: 'operational' as const };
}
return { ...campus, constructionProgress: newProgress };
}
const dataCenters: DataCenter[] = campus.dataCenters.map(dc => {
if (dc.status === 'constructing') {
const newProgress = dc.constructionProgress + 1;
if (newProgress >= dc.constructionTotal) {
notifications.push({ title: 'Data Center Online', message: `${dc.name} is now operational!`, type: 'success' });
return { ...dc, constructionProgress: dc.constructionTotal, status: 'operational' as const };
}
return { ...dc, constructionProgress: newProgress };
}
let computeRacksOnline = dc.computeRacksOnline;
let dcRepairCosts = 0;
// Process retrofit
if (dc.status === 'retrofitting' && dc.retrofitState) {
const rs = { ...dc.retrofitState };
rs.progress += (1 + opsEff * 0.1);
if (rs.progress >= rs.total) {
if (rs.phase === 'decommissioning') {
const installTotal = cohortStageTotal('installation', rs.toSkuId, rs.racksRemaining);
// Clear DC topology on retrofit
for (const sid of dc.networkSummary.switchIds) delete registry[sid];
return {
...dc,
computeRacksOnline: 0,
computeRacksFailed: 0,
rackSkuId: rs.toSkuId,
deploymentCohorts: [{
id: `retrofit-${dc.id}-${Date.now()}`,
count: rs.racksRemaining,
skuId: rs.toSkuId,
stage: 'installation' as PipelineStage,
stageProgress: 0,
stageTotal: installTotal,
repairCount: 0,
}],
retrofitState: { ...rs, phase: 'installing' as const, progress: 0, total: installTotal },
networkSummary: emptyDCNetworkSummary(),
effectiveComputeRacks: 0,
usedSlots: 0, usedPowerKW: 0, currentUptime: 0,
energyCostPerTick: DC_TIER_CONFIGS[dc.tier].baseEnergyCostPerTick * LOCATION_CONFIGS[cluster.locationId].energyCostMultiplier,
maintenanceCostPerTick: 0,
};
} else {
notifications.push({ title: 'Retrofit Complete', message: `${dc.name} retrofit to ${RACK_SKU_CONFIGS[rs.toSkuId].name} is complete!`, type: 'success' });
return { ...dc, status: 'operational' as const, retrofitState: null };
}
}
return { ...dc, retrofitState: rs };
}
// Process deployment cohorts
const updatedCohorts: DeploymentCohort[] = [];
let racksJustOnlined = 0;
for (const cohort of dc.deploymentCohorts) {
// network-down cohorts don't progress via speed — handled separately below
if (cohort.stage === 'network-down') {
updatedCohorts.push(cohort);
continue;
}
const baseSpeed = stageSpeed(cohort.stage, engEff, opsEff);
const pipelineBonus = cohort.stage !== 'repair' ? (researchBonuses?.pipelineSpeedBonus ?? 0) : 0;
const speed = baseSpeed * (1 + pipelineBonus);
const newProgress = cohort.stageProgress + speed;
if (newProgress < cohort.stageTotal) {
updatedCohorts.push({ ...cohort, stageProgress: newProgress });
continue;
}
if (cohort.stage === 'decommission') continue;
if (cohort.stage === 'repair') {
const testTotal = cohortStageTotal('testing', cohort.skuId, cohort.count);
updatedCohorts.push({ ...cohort, stage: 'testing', stageProgress: 0, stageTotal: testTotal });
continue;
}
const next = nextStage(cohort.stage);
if (next === 'production') {
const sku = RACK_SKU_CONFIGS[cohort.skuId];
const effectiveFailRate = sku.testFailureRate
* (1 - dc.coolingLevel * COOLING_FAILURE_REDUCTION)
* (1 - opsEff * 0.2)
* (1 - qaResearchBonus);
const failed = binomialSample(cohort.count, effectiveFailRate);
const passed = cohort.count - failed;
racksJustOnlined += passed;
if (failed > 0) {
const repairCost = sku.baseCost * sku.repairCostFraction * failed;
dcRepairCosts += repairCost;
updatedCohorts.push({
id: `repair-${cohort.id}`,
count: failed, skuId: cohort.skuId,
stage: 'repair', stageProgress: 0,
stageTotal: cohortStageTotal('repair', cohort.skuId, failed),
repairCount: cohort.repairCount + 1,
});
}
} else {
const total = cohortStageTotal(next, cohort.skuId, cohort.count);
updatedCohorts.push({ ...cohort, stage: next, stageProgress: 0, stageTotal: total });
}
}
computeRacksOnline += racksJustOnlined;
// Expand topology for newly onlined racks
let networkSummary = dc.networkSummary;
if (racksJustOnlined > 0) {
if (networkSummary.switchIds.length === 0) {
networkSummary = buildDCTopology(computeRacksOnline, dc.tier, dc.id, registry);
} else {
networkSummary = expandDCTopology(networkSummary, racksJustOnlined, dc.tier, dc.id, registry);
}
}
// Production failures
if (computeRacksOnline > 0 && dc.rackSkuId) {
const sku = RACK_SKU_CONFIGS[dc.rackSkuId];
const effectiveRate = sku.productionFailureRate
* (1 - dc.coolingLevel * COOLING_FAILURE_REDUCTION)
* (1 - dc.redundancyLevel * REDUNDANCY_FAILURE_REDUCTION);
const prodFailures = binomialSample(computeRacksOnline, effectiveRate);
if (prodFailures > 0) {
computeRacksOnline -= prodFailures;
dcRepairCosts += sku.baseCost * sku.repairCostFraction * prodFailures;
updatedCohorts.push({
id: `prodfail-${dc.id}-${Date.now()}`,
count: prodFailures, skuId: dc.rackSkuId,
stage: 'repair', stageProgress: 0,
stageTotal: cohortStageTotal('repair', dc.rackSkuId, prodFailures),
repairCount: 0,
});
networkSummary = shrinkDCTopology(networkSummary, prodFailures, dc.tier, registry);
}
}
repairCosts += dcRepairCosts;
// Recompute DC network summary after failures/repairs
if (netResult.dirty && networkSummary.switchIds.length > 0) {
networkSummary = buildDCSummary(
networkSummary.switchIds, networkSummary.networkRackCount, registry,
);
}
// Rackdown: detect recovery (previously disconnected racks now have connectivity)
const prevDisconnected = dc.networkSummary.racksDisconnected;
const currDisconnected = networkSummary.racksDisconnected;
if (currDisconnected < prevDisconnected && dc.rackSkuId) {
const recovered = prevDisconnected - currDisconnected;
computeRacksOnline -= recovered;
networkSummary = shrinkDCTopology(networkSummary, recovered, dc.tier, registry);
updatedCohorts.push({
id: `netrecovery-${dc.id}-${Date.now()}`,
count: recovered, skuId: dc.rackSkuId,
stage: 'testing', stageProgress: 0,
stageTotal: cohortStageTotal('testing', dc.rackSkuId, recovered),
repairCount: 0,
});
// Recompute summary after shrink
networkSummary = buildDCSummary(
networkSummary.switchIds, networkSummary.networkRackCount, registry,
);
}
// Compute DC aggregates
const effectiveComputeRacks = Math.max(0,
computeRacksOnline - networkSummary.racksDisconnected);
const location = LOCATION_CONFIGS[cluster.locationId];
const tierConfig = DC_TIER_CONFIGS[dc.tier];
const pipelineRacks = updatedCohorts
.filter(c => c.stage !== 'decommission')
.reduce((sum, c) => sum + c.count, 0);
const computeRacksFailed = updatedCohorts
.filter(c => c.stage === 'repair')
.reduce((sum, c) => sum + c.count, 0);
const totalRacksInDc = computeRacksOnline + pipelineRacks;
const netSlots = networkSummary.networkRackCount;
const usedSlots = computeRacksOnline + pipelineRacks + netSlots;
let usedPowerKW = 0;
let dcFlops = 0;
let dcTrainingFlops = 0;
let dcInferenceFlops = 0;
let dcTotalVramGB = 0;
if (dc.rackSkuId && computeRacksOnline > 0) {
const sku = RACK_SKU_CONFIGS[dc.rackSkuId];
usedPowerKW = computeRacksOnline * sku.powerDrawKW;
const bwFraction = networkSummary.effectiveFlopsFraction;
const interconnectMult = computeInterconnectMultiplier(sku, effectiveComputeRacks, dc.networkFabric);
dcTrainingFlops = effectiveComputeRacks * sku.trainingFlops * bwFraction * interconnectMult;
dcInferenceFlops = effectiveComputeRacks * sku.inferenceFlops * bwFraction;
dcTotalVramGB = computeRacksOnline * sku.totalVramGB;
dcFlops = dcTrainingFlops + dcInferenceFlops;
}
const pue = COOLING_TYPE_CONFIGS[dc.coolingType].pueMultiplier;
const energyReduction = researchBonuses?.energyCostReduction ?? 0;
const energyCostPerTick = (tierConfig.baseEnergyCostPerTick + usedPowerKW * BASE_ENERGY_COST_PER_FLOP)
* location.energyCostMultiplier * pue * (1 - energyReduction);
const maintenanceCostPerTick = totalRacksInDc * BASE_MAINTENANCE_PER_RACK;
const currentUptime = totalRacksInDc > 0 ? effectiveComputeRacks / totalRacksInDc : 1;
// Latency penalty from bandwidth degradation
if (networkSummary.averageBandwidth < 1 && computeRacksOnline > 0) {
const penalty = (1 - networkSummary.averageBandwidth) * NETWORK_DEGRADATION.bandwidthToLatencyPenalty;
globalLatencyPenalty += penalty;
latencyDCCount++;
}
totalFlops += dcFlops;
totalTrainingFlops += dcTrainingFlops;
totalInferenceFlops += dcInferenceFlops;
totalVramGB += dcTotalVramGB;
totalRackCount += totalRacksInDc + netSlots;
totalComputeRackCount += totalRacksInDc;
totalDataCenterCount++;
if (totalRacksInDc > 0) { totalUptime += currentUptime; dcWithRacks++; }
return {
...dc,
computeRacksOnline, computeRacksFailed,
deploymentCohorts: updatedCohorts,
networkSummary, effectiveComputeRacks,
usedSlots, usedPowerKW, energyCostPerTick, maintenanceCostPerTick, currentUptime,
dcTrainingFlops, dcInferenceFlops, dcTotalVramGB,
};
});
// Process campus retrofit queue
let finalDCs = dataCenters;
let updatedQueue: CampusRetrofitQueue | null = campus.retrofitQueue ?? null;
if (updatedQueue && updatedQueue.pendingDCIds.length + updatedQueue.activeDCIds.length > 0) {
updatedQueue = { ...updatedQueue };
const newlyCompleted = finalDCs.filter(
dc => updatedQueue!.activeDCIds.includes(dc.id) && dc.status === 'operational',
);
if (newlyCompleted.length > 0) {
updatedQueue.activeDCIds = updatedQueue.activeDCIds.filter(
id => !newlyCompleted.some(dc => dc.id === id),
);
updatedQueue.completedDCIds = [...updatedQueue.completedDCIds, ...newlyCompleted.map(dc => dc.id)];
}
const slotsAvailable = updatedQueue.maxConcurrent - updatedQueue.activeDCIds.length;
if (slotsAvailable > 0 && updatedQueue.pendingDCIds.length > 0) {
const toStart = updatedQueue.pendingDCIds.slice(0, slotsAvailable);
updatedQueue.pendingDCIds = updatedQueue.pendingDCIds.slice(toStart.length);
updatedQueue.activeDCIds = [...updatedQueue.activeDCIds, ...toStart];
finalDCs = finalDCs.map(dc => {
if (!toStart.includes(dc.id)) return dc;
if (dc.status !== 'operational' || !dc.rackSkuId) return dc;
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
const totalRacks = dc.computeRacksOnline + pipelineCount;
if (totalRacks <= 0) return dc;
const oldSku = RACK_SKU_CONFIGS[dc.rackSkuId as RackSkuId];
const decommTicks = Math.ceil(oldSku.pipelineTimeTicks.installation * (1 + COHORT_SCALE_FACTOR * totalRacks));
// Clear topology on retrofit start
for (const sid of dc.networkSummary.switchIds) delete registry[sid];
return {
...dc,
status: 'retrofitting' as const,
deploymentCohorts: [],
networkSummary: emptyDCNetworkSummary(),
retrofitState: {
fromSkuId: dc.rackSkuId as RackSkuId,
toSkuId: updatedQueue!.targetSkuId,
phase: 'decommissioning' as const,
progress: 0, total: decommTicks, racksRemaining: totalRacks,
},
};
});
}
if (updatedQueue.pendingDCIds.length === 0 && updatedQueue.activeDCIds.length === 0) {
notifications.push({
title: 'Campus Retrofit Complete',
message: `All DCs in ${campus.name} have been retrofitted to ${RACK_SKU_CONFIGS[updatedQueue.targetSkuId].name}!`,
type: 'success',
});
updatedQueue = null;
}
}
return { ...campus, dataCenters: finalDCs, retrofitQueue: updatedQueue };
});
return { ...cluster, campuses };
});
const avgLatencyPenalty = latencyDCCount > 0 ? globalLatencyPenalty / latencyDCCount : 0;
return {
infrastructure: {
clusters,
switchRegistry: registry,
totalFlops,
totalTrainingFlops,
totalInferenceFlops,
totalVramGB,
totalUptime: dcWithRacks > 0 ? totalUptime / dcWithRacks : 1,
totalRackCount,
totalComputeRackCount,
totalDataCenterCount,
networkLatencyPenalty: avgLatencyPenalty,
},
notifications,
repairCosts,
};
}