c1cc70eeb9
Full rebrand: UI display text, package scope (@ai-tycoon/* -> @token-empire/*), localStorage keys, Docker/CI image paths, database names, and documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
676 lines
26 KiB
TypeScript
676 lines
26 KiB
TypeScript
import type {
|
|
GameState, InfrastructureState, Cluster, Campus, DataCenter,
|
|
DeploymentCohort, PipelineStage, RackSkuId,
|
|
SwitchTier, DCNetworkSummary, CampusNetworkSummary, ClusterNetworkSummary,
|
|
RepairBatch, CampusRetrofitQueue, DCTier, IntraNodeInterconnect, NetworkFabric, RackSkuConfig,
|
|
} from '@token-empire/shared';
|
|
import {
|
|
LOCATION_CONFIGS,
|
|
RACK_SKU_CONFIGS,
|
|
DC_TIER_CONFIGS,
|
|
BASE_ENERGY_COST_PER_FLOP,
|
|
BASE_MAINTENANCE_PER_RACK,
|
|
COOLING_FAILURE_REDUCTION,
|
|
REDUNDANCY_FAILURE_REDUCTION,
|
|
RACK_REPAIR_BASE_TICKS,
|
|
COHORT_SCALE_FACTOR,
|
|
PIPELINE_ORDER_BASE_TICKS,
|
|
SWITCH_TIER_CONFIGS,
|
|
T3_COUNT_PER_DC_TIER,
|
|
SWITCH_REPAIR_COST_FRACTION,
|
|
NETWORK_DEGRADATION,
|
|
COOLING_TYPE_CONFIGS,
|
|
NETWORK_FABRIC_CONFIGS,
|
|
estimateNetworkSlots,
|
|
} from '@token-empire/shared';
|
|
import type { TickNotification } from '../tick';
|
|
import type { ResearchBonuses } from './researchBonuses';
|
|
|
|
export interface InfraTickResult {
|
|
infrastructure: InfrastructureState;
|
|
notifications: TickNotification[];
|
|
repairCosts: number;
|
|
}
|
|
|
|
// --- Pipeline helpers ---
|
|
|
|
const PIPELINE_ADVANCE_ORDER: PipelineStage[] = [
|
|
'ordered', 'manufacturing', 'receiving', 'installation', 'testing',
|
|
];
|
|
|
|
function nextStage(stage: PipelineStage): PipelineStage | 'production' {
|
|
const idx = PIPELINE_ADVANCE_ORDER.indexOf(stage);
|
|
if (idx === -1 || idx === PIPELINE_ADVANCE_ORDER.length - 1) return 'production';
|
|
return PIPELINE_ADVANCE_ORDER[idx + 1];
|
|
}
|
|
|
|
function cohortStageTotal(stage: PipelineStage, skuId: string, count: number): number {
|
|
const sku = RACK_SKU_CONFIGS[skuId as keyof typeof RACK_SKU_CONFIGS];
|
|
const timings = sku.pipelineTimeTicks;
|
|
let base: number;
|
|
switch (stage) {
|
|
case 'ordered': base = PIPELINE_ORDER_BASE_TICKS; break;
|
|
case 'manufacturing': base = timings.manufacturing; break;
|
|
case 'receiving': base = timings.receiving; break;
|
|
case 'installation': base = timings.installation; break;
|
|
case 'testing': base = timings.testing; break;
|
|
case 'repair': base = RACK_REPAIR_BASE_TICKS; break;
|
|
case 'decommission': base = timings.installation; break;
|
|
case 'network-down': base = 0; break;
|
|
default: base = 0;
|
|
}
|
|
return Math.ceil(base * (1 + COHORT_SCALE_FACTOR * count));
|
|
}
|
|
|
|
function stageSpeed(stage: PipelineStage, engEff: number, opsEff: number): number {
|
|
switch (stage) {
|
|
case 'manufacturing': return 1 + engEff * 0.1;
|
|
case 'installation':
|
|
case 'testing':
|
|
case 'decommission': return 1 + opsEff * 0.1;
|
|
case 'repair': return 1 + opsEff * 0.05;
|
|
case 'network-down': return 0;
|
|
default: return 1;
|
|
}
|
|
}
|
|
|
|
function binomialSample(n: number, p: number): number {
|
|
if (n <= 0 || p <= 0) return 0;
|
|
if (p >= 1) return n;
|
|
const expected = n * p;
|
|
const base = Math.floor(expected);
|
|
const frac = expected - base;
|
|
return base + (Math.random() < frac ? 1 : 0);
|
|
}
|
|
|
|
// --- Aggregate Network Model ---
|
|
|
|
const DC_TIERS: SwitchTier[] = ['tor', 't1', 't2', 't3'];
|
|
|
|
export function emptyDCNetworkSummary(): DCNetworkSummary {
|
|
return {
|
|
totalByTier: {}, healthyByTier: {},
|
|
repairBatches: [], networkRackCount: 0,
|
|
racksDisconnected: 0, racksDegraded: 0,
|
|
averageBandwidth: 1, effectiveFlopsFraction: 1,
|
|
};
|
|
}
|
|
|
|
export function emptyCampusNetworkSummary(): CampusNetworkSummary {
|
|
return { totalT4: 0, healthyT4: 0, crossDCBandwidth: 1 };
|
|
}
|
|
|
|
export function emptyClusterNetworkSummary(): ClusterNetworkSummary {
|
|
return { totalT5: 0, healthyT5: 0, crossCampusBandwidth: 1 };
|
|
}
|
|
|
|
function computeTopologyCounts(
|
|
computeRackCount: number,
|
|
dcTier: DCTier,
|
|
): Partial<Record<SwitchTier, number>> {
|
|
if (computeRackCount <= 0) return {};
|
|
const t1Count = Math.ceil(computeRackCount / SWITCH_TIER_CONFIGS.t1.fanOut);
|
|
const t2Count = Math.ceil(t1Count / SWITCH_TIER_CONFIGS.t2.fanOut);
|
|
const t3Count = T3_COUNT_PER_DC_TIER[dcTier];
|
|
return { tor: computeRackCount, t1: t1Count, t2: t2Count, t3: t3Count };
|
|
}
|
|
|
|
export function buildDCNetworkSummary(
|
|
computeRackCount: number,
|
|
dcTier: DCTier,
|
|
): DCNetworkSummary {
|
|
if (computeRackCount <= 0) return emptyDCNetworkSummary();
|
|
const totalByTier = computeTopologyCounts(computeRackCount, dcTier);
|
|
const healthyByTier = { ...totalByTier };
|
|
return {
|
|
totalByTier, healthyByTier,
|
|
repairBatches: [],
|
|
networkRackCount: estimateNetworkSlots(computeRackCount, dcTier),
|
|
racksDisconnected: 0, racksDegraded: 0,
|
|
averageBandwidth: 1, effectiveFlopsFraction: 1,
|
|
};
|
|
}
|
|
|
|
export function expandDCNetwork(
|
|
existing: DCNetworkSummary,
|
|
addedRacks: number,
|
|
dcTier: DCTier,
|
|
): DCNetworkSummary {
|
|
if (addedRacks <= 0) return existing;
|
|
const oldTor = existing.totalByTier.tor ?? 0;
|
|
const newTor = oldTor + addedRacks;
|
|
const newTotal = computeTopologyCounts(newTor, dcTier);
|
|
const healthyByTier: Partial<Record<SwitchTier, number>> = {};
|
|
for (const tier of DC_TIERS) {
|
|
const oldTotal = existing.totalByTier[tier] ?? 0;
|
|
const oldHealthy = existing.healthyByTier[tier] ?? 0;
|
|
const added = (newTotal[tier] ?? 0) - oldTotal;
|
|
healthyByTier[tier] = oldHealthy + Math.max(0, added);
|
|
}
|
|
const summary: DCNetworkSummary = {
|
|
...existing,
|
|
totalByTier: newTotal,
|
|
healthyByTier,
|
|
networkRackCount: estimateNetworkSlots(newTor, dcTier),
|
|
};
|
|
return recomputeBandwidth(summary);
|
|
}
|
|
|
|
export function shrinkDCNetwork(
|
|
existing: DCNetworkSummary,
|
|
removedRacks: number,
|
|
dcTier: DCTier,
|
|
): DCNetworkSummary {
|
|
if (removedRacks <= 0) return existing;
|
|
const oldTor = existing.totalByTier.tor ?? 0;
|
|
const newTor = Math.max(0, oldTor - removedRacks);
|
|
if (newTor === 0) return emptyDCNetworkSummary();
|
|
const newTotal = computeTopologyCounts(newTor, dcTier);
|
|
const healthyByTier: Partial<Record<SwitchTier, number>> = {};
|
|
for (const tier of DC_TIERS) {
|
|
const nt = newTotal[tier] ?? 0;
|
|
const oh = existing.healthyByTier[tier] ?? 0;
|
|
healthyByTier[tier] = Math.min(oh, nt);
|
|
}
|
|
const repairBatches = existing.repairBatches.filter(b => {
|
|
const nt = newTotal[b.tier] ?? 0;
|
|
const nh = healthyByTier[b.tier] ?? 0;
|
|
return nh < nt;
|
|
});
|
|
const summary: DCNetworkSummary = {
|
|
...existing,
|
|
totalByTier: newTotal,
|
|
healthyByTier,
|
|
repairBatches,
|
|
networkRackCount: estimateNetworkSlots(newTor, dcTier),
|
|
};
|
|
return recomputeBandwidth(summary);
|
|
}
|
|
|
|
function computeAggregateBandwidth(
|
|
summary: DCNetworkSummary,
|
|
redundancyBonus: number,
|
|
): number {
|
|
let minBW = 1;
|
|
for (const tier of DC_TIERS) {
|
|
const total = summary.totalByTier[tier] ?? 0;
|
|
if (total === 0) continue;
|
|
const healthy = summary.healthyByTier[tier] ?? 0;
|
|
const tierBW = Math.min(1, (healthy + redundancyBonus) / total);
|
|
if (tierBW < minBW) minBW = tierBW;
|
|
}
|
|
return minBW;
|
|
}
|
|
|
|
function recomputeBandwidth(summary: DCNetworkSummary, redundancyBonus = 0): DCNetworkSummary {
|
|
const avgBW = computeAggregateBandwidth(summary, redundancyBonus);
|
|
const torTotal = summary.totalByTier.tor ?? 0;
|
|
const torHealthy = summary.healthyByTier.tor ?? 0;
|
|
const torFailed = torTotal - torHealthy;
|
|
const disconnected = avgBW === 0 ? torTotal : torFailed;
|
|
const degraded = avgBW > 0 && avgBW < 1 ? Math.ceil(torTotal * (1 - avgBW)) - disconnected : 0;
|
|
return {
|
|
...summary,
|
|
averageBandwidth: avgBW,
|
|
effectiveFlopsFraction: avgBW,
|
|
racksDisconnected: Math.max(0, disconnected),
|
|
racksDegraded: Math.max(0, degraded),
|
|
};
|
|
}
|
|
|
|
function processNetworkForDC(
|
|
summary: DCNetworkSummary,
|
|
networkResearchBonus: number,
|
|
opsEff: number,
|
|
repairSpeedBonus: number,
|
|
hotStandbyTicks: number,
|
|
redundancyBonus: number,
|
|
): { summary: DCNetworkSummary; costs: number; notifications: TickNotification[] } {
|
|
const torTotal = summary.totalByTier.tor ?? 0;
|
|
if (torTotal === 0) return { summary, costs: 0, notifications: [] };
|
|
|
|
let costs = 0;
|
|
const notifications: TickNotification[] = [];
|
|
const healthyByTier = { ...summary.healthyByTier };
|
|
let dirty = false;
|
|
|
|
for (const tier of DC_TIERS) {
|
|
const healthy = healthyByTier[tier] ?? 0;
|
|
if (healthy <= 0) continue;
|
|
const rate = SWITCH_TIER_CONFIGS[tier].failureRatePerTick * (1 - networkResearchBonus);
|
|
const failed = binomialSample(healthy, rate);
|
|
if (failed > 0) {
|
|
healthyByTier[tier] = healthy - failed;
|
|
const baseRepair = SWITCH_TIER_CONFIGS[tier].repairBaseTicks;
|
|
const repairTime = hotStandbyTicks > 0
|
|
? hotStandbyTicks
|
|
: baseRepair * (1 - repairSpeedBonus);
|
|
summary.repairBatches.push({ tier, count: failed, ticksRemaining: repairTime });
|
|
costs += SWITCH_TIER_CONFIGS[tier].baseCost * SWITCH_REPAIR_COST_FRACTION * failed;
|
|
dirty = true;
|
|
|
|
if (tier === 't3') {
|
|
notifications.push({ title: 'Core Network Failure', message: `Tier-3 core switch failed — potential DC disconnect!`, type: 'danger' });
|
|
} else if (tier === 't2') {
|
|
notifications.push({ title: 'Network Switch Failure', message: `Tier-2 spine switch failed — racks may be degraded.`, type: 'warning' });
|
|
}
|
|
}
|
|
}
|
|
|
|
const remainingBatches: RepairBatch[] = [];
|
|
for (const batch of summary.repairBatches) {
|
|
const newTicks = batch.ticksRemaining - (1 + opsEff * 0.05);
|
|
if (newTicks <= 0) {
|
|
healthyByTier[batch.tier] = Math.min(
|
|
summary.totalByTier[batch.tier] ?? 0,
|
|
(healthyByTier[batch.tier] ?? 0) + batch.count,
|
|
);
|
|
dirty = true;
|
|
} else {
|
|
remainingBatches.push({ ...batch, ticksRemaining: newTicks });
|
|
}
|
|
}
|
|
|
|
if (!dirty) return { summary: { ...summary, repairBatches: remainingBatches }, costs, notifications };
|
|
|
|
const updated: DCNetworkSummary = {
|
|
...summary,
|
|
healthyByTier,
|
|
repairBatches: remainingBatches,
|
|
};
|
|
return { summary: recomputeBandwidth(updated, redundancyBonus), costs, notifications };
|
|
}
|
|
|
|
// --- Interconnect Training Multiplier ---
|
|
|
|
const INTRA_NODE_BONUS: Record<IntraNodeInterconnect, number> = {
|
|
'pcie-gen4': 0.0,
|
|
'pcie-gen5': 0.05,
|
|
'nvlink-3': 0.15,
|
|
'nvlink-4': 0.25,
|
|
'nvlink-5': 0.35,
|
|
'nvlink-domain': 0.50,
|
|
'infinity-fabric': 0.10,
|
|
'custom-mesh': 0.40,
|
|
};
|
|
|
|
function computeInterconnectMultiplier(
|
|
sku: RackSkuConfig,
|
|
rackCount: number,
|
|
fabric: NetworkFabric,
|
|
): number {
|
|
if (rackCount <= 1) return 1.0;
|
|
const intra = INTRA_NODE_BONUS[sku.intraNodeInterconnect] ?? 0;
|
|
const fabricBonus = NETWORK_FABRIC_CONFIGS[fabric].trainingScalingBonus;
|
|
return Math.min(1.0, 0.6 + intra + fabricBonus);
|
|
}
|
|
|
|
// --- Main Infrastructure Tick ---
|
|
|
|
export function processInfrastructure(state: GameState, researchBonuses?: ResearchBonuses): InfraTickResult {
|
|
const notifications: TickNotification[] = [];
|
|
let repairCosts = 0;
|
|
|
|
const engEff = state.talent.departments.engineering.effectiveness;
|
|
const opsEff = state.talent.departments.operations.effectiveness;
|
|
const qaResearchBonus = state.research.completedResearch.includes('quality-assurance') ? 0.25 : 0;
|
|
const netResearch1 = state.research.completedResearch.includes('network-engineering-i') ? 0.4 : 0;
|
|
const netResearch2 = state.research.completedResearch.includes('network-engineering-ii') ? 0.5 : 0;
|
|
const networkResearchBonus = Math.min(0.8, netResearch1 + netResearch2);
|
|
const repairSpeedBonus = state.research.completedResearch.includes('network-fast-repair') ? 0.4 : 0;
|
|
const hotStandbyTicks = state.research.completedResearch.includes('network-hot-standby') ? 5 : 0;
|
|
const redundancyBonus = state.research.completedResearch.includes('network-redundancy') ? 1 : 0;
|
|
|
|
let totalFlops = 0;
|
|
let totalTrainingFlops = 0;
|
|
let totalInferenceFlops = 0;
|
|
let totalVramGB = 0;
|
|
let totalUptime = 0;
|
|
let totalRackCount = 0;
|
|
let totalComputeRackCount = 0;
|
|
let totalDataCenterCount = 0;
|
|
let dcWithRacks = 0;
|
|
let globalLatencyPenalty = 0;
|
|
let latencyDCCount = 0;
|
|
|
|
const clusters: Cluster[] = state.infrastructure.clusters.map(cluster => {
|
|
if (cluster.status === 'constructing') {
|
|
const newProgress = cluster.constructionProgress + 1;
|
|
if (newProgress >= cluster.constructionTotal) {
|
|
notifications.push({
|
|
title: 'Cluster Online',
|
|
message: `${cluster.name} cluster in ${LOCATION_CONFIGS[cluster.locationId].name} is now operational!`,
|
|
type: 'success',
|
|
});
|
|
return { ...cluster, constructionProgress: cluster.constructionTotal, status: 'operational' as const };
|
|
}
|
|
return { ...cluster, constructionProgress: newProgress };
|
|
}
|
|
|
|
const campuses: Campus[] = cluster.campuses.map(campus => {
|
|
if (campus.status === 'constructing') {
|
|
const newProgress = campus.constructionProgress + 1;
|
|
if (newProgress >= campus.constructionTotal) {
|
|
notifications.push({ title: 'Campus Ready', message: `Campus ${campus.name} is now operational!`, type: 'success' });
|
|
return { ...campus, constructionProgress: campus.constructionTotal, status: 'operational' as const };
|
|
}
|
|
return { ...campus, constructionProgress: newProgress };
|
|
}
|
|
|
|
const dataCenters: DataCenter[] = campus.dataCenters.map(dc => {
|
|
if (dc.status === 'constructing') {
|
|
const newProgress = dc.constructionProgress + 1;
|
|
if (newProgress >= dc.constructionTotal) {
|
|
notifications.push({ title: 'Data Center Online', message: `${dc.name} is now operational!`, type: 'success' });
|
|
return { ...dc, constructionProgress: dc.constructionTotal, status: 'operational' as const };
|
|
}
|
|
return { ...dc, constructionProgress: newProgress };
|
|
}
|
|
|
|
let computeRacksOnline = dc.computeRacksOnline;
|
|
let dcRepairCosts = 0;
|
|
|
|
// Process retrofit
|
|
if (dc.status === 'retrofitting' && dc.retrofitState) {
|
|
const rs = { ...dc.retrofitState };
|
|
rs.progress += (1 + opsEff * 0.1);
|
|
|
|
if (rs.progress >= rs.total) {
|
|
if (rs.phase === 'decommissioning') {
|
|
const installTotal = cohortStageTotal('installation', rs.toSkuId, rs.racksRemaining);
|
|
return {
|
|
...dc,
|
|
computeRacksOnline: 0,
|
|
computeRacksFailed: 0,
|
|
rackSkuId: rs.toSkuId,
|
|
deploymentCohorts: [{
|
|
id: `retrofit-${dc.id}-${Date.now()}`,
|
|
count: rs.racksRemaining,
|
|
skuId: rs.toSkuId,
|
|
stage: 'installation' as PipelineStage,
|
|
stageProgress: 0,
|
|
stageTotal: installTotal,
|
|
repairCount: 0,
|
|
}],
|
|
retrofitState: { ...rs, phase: 'installing' as const, progress: 0, total: installTotal },
|
|
networkSummary: emptyDCNetworkSummary(),
|
|
effectiveComputeRacks: 0,
|
|
usedSlots: 0, usedPowerKW: 0, currentUptime: 0,
|
|
energyCostPerTick: DC_TIER_CONFIGS[dc.tier].baseEnergyCostPerTick * LOCATION_CONFIGS[cluster.locationId].energyCostMultiplier,
|
|
maintenanceCostPerTick: 0,
|
|
};
|
|
} else {
|
|
notifications.push({ title: 'Retrofit Complete', message: `${dc.name} retrofit to ${RACK_SKU_CONFIGS[rs.toSkuId].name} is complete!`, type: 'success' });
|
|
return { ...dc, status: 'operational' as const, retrofitState: null };
|
|
}
|
|
}
|
|
return { ...dc, retrofitState: rs };
|
|
}
|
|
|
|
// Process deployment cohorts
|
|
const updatedCohorts: DeploymentCohort[] = [];
|
|
let racksJustOnlined = 0;
|
|
|
|
for (const cohort of dc.deploymentCohorts) {
|
|
// network-down cohorts don't progress via speed — handled separately below
|
|
if (cohort.stage === 'network-down') {
|
|
updatedCohorts.push(cohort);
|
|
continue;
|
|
}
|
|
|
|
const baseSpeed = stageSpeed(cohort.stage, engEff, opsEff);
|
|
const pipelineBonus = cohort.stage !== 'repair' ? (researchBonuses?.pipelineSpeedBonus ?? 0) : 0;
|
|
const speed = baseSpeed * (1 + pipelineBonus);
|
|
const newProgress = cohort.stageProgress + speed;
|
|
|
|
if (newProgress < cohort.stageTotal) {
|
|
updatedCohorts.push({ ...cohort, stageProgress: newProgress });
|
|
continue;
|
|
}
|
|
|
|
if (cohort.stage === 'decommission') continue;
|
|
|
|
if (cohort.stage === 'repair') {
|
|
const testTotal = cohortStageTotal('testing', cohort.skuId, cohort.count);
|
|
updatedCohorts.push({ ...cohort, stage: 'testing', stageProgress: 0, stageTotal: testTotal });
|
|
continue;
|
|
}
|
|
|
|
const next = nextStage(cohort.stage);
|
|
|
|
if (next === 'production') {
|
|
const sku = RACK_SKU_CONFIGS[cohort.skuId];
|
|
const effectiveFailRate = sku.testFailureRate
|
|
* (1 - dc.coolingLevel * COOLING_FAILURE_REDUCTION)
|
|
* (1 - opsEff * 0.2)
|
|
* (1 - qaResearchBonus);
|
|
|
|
const failed = binomialSample(cohort.count, effectiveFailRate);
|
|
const passed = cohort.count - failed;
|
|
racksJustOnlined += passed;
|
|
|
|
if (failed > 0) {
|
|
const repairCost = sku.baseCost * sku.repairCostFraction * failed;
|
|
dcRepairCosts += repairCost;
|
|
updatedCohorts.push({
|
|
id: `repair-${cohort.id}`,
|
|
count: failed, skuId: cohort.skuId,
|
|
stage: 'repair', stageProgress: 0,
|
|
stageTotal: cohortStageTotal('repair', cohort.skuId, failed),
|
|
repairCount: cohort.repairCount + 1,
|
|
});
|
|
}
|
|
} else {
|
|
const total = cohortStageTotal(next, cohort.skuId, cohort.count);
|
|
updatedCohorts.push({ ...cohort, stage: next, stageProgress: 0, stageTotal: total });
|
|
}
|
|
}
|
|
|
|
computeRacksOnline += racksJustOnlined;
|
|
|
|
// Expand topology for newly onlined racks
|
|
let networkSummary = dc.networkSummary;
|
|
if (racksJustOnlined > 0) {
|
|
const torTotal = networkSummary.totalByTier.tor ?? 0;
|
|
if (torTotal === 0) {
|
|
networkSummary = buildDCNetworkSummary(computeRacksOnline, dc.tier);
|
|
} else {
|
|
networkSummary = expandDCNetwork(networkSummary, racksJustOnlined, dc.tier);
|
|
}
|
|
}
|
|
|
|
// Production failures
|
|
if (computeRacksOnline > 0 && dc.rackSkuId) {
|
|
const sku = RACK_SKU_CONFIGS[dc.rackSkuId];
|
|
const effectiveRate = sku.productionFailureRate
|
|
* (1 - dc.coolingLevel * COOLING_FAILURE_REDUCTION)
|
|
* (1 - dc.redundancyLevel * REDUNDANCY_FAILURE_REDUCTION);
|
|
const prodFailures = binomialSample(computeRacksOnline, effectiveRate);
|
|
if (prodFailures > 0) {
|
|
computeRacksOnline -= prodFailures;
|
|
dcRepairCosts += sku.baseCost * sku.repairCostFraction * prodFailures;
|
|
updatedCohorts.push({
|
|
id: `prodfail-${dc.id}-${Date.now()}`,
|
|
count: prodFailures, skuId: dc.rackSkuId,
|
|
stage: 'repair', stageProgress: 0,
|
|
stageTotal: cohortStageTotal('repair', dc.rackSkuId, prodFailures),
|
|
repairCount: 0,
|
|
});
|
|
networkSummary = shrinkDCNetwork(networkSummary, prodFailures, dc.tier);
|
|
}
|
|
}
|
|
|
|
repairCosts += dcRepairCosts;
|
|
|
|
// Process per-DC network failures and repairs (aggregate model)
|
|
const netResult = processNetworkForDC(
|
|
networkSummary, networkResearchBonus, opsEff,
|
|
repairSpeedBonus, hotStandbyTicks, redundancyBonus,
|
|
);
|
|
networkSummary = netResult.summary;
|
|
repairCosts += netResult.costs;
|
|
if (netResult.notifications.length > 0) notifications.push(...netResult.notifications);
|
|
|
|
// Rackdown: detect recovery (previously disconnected racks now have connectivity)
|
|
const prevDisconnected = dc.networkSummary.racksDisconnected;
|
|
const currDisconnected = networkSummary.racksDisconnected;
|
|
|
|
if (currDisconnected < prevDisconnected && dc.rackSkuId) {
|
|
const recovered = prevDisconnected - currDisconnected;
|
|
computeRacksOnline -= recovered;
|
|
networkSummary = shrinkDCNetwork(networkSummary, recovered, dc.tier);
|
|
updatedCohorts.push({
|
|
id: `netrecovery-${dc.id}-${Date.now()}`,
|
|
count: recovered, skuId: dc.rackSkuId,
|
|
stage: 'testing', stageProgress: 0,
|
|
stageTotal: cohortStageTotal('testing', dc.rackSkuId, recovered),
|
|
repairCount: 0,
|
|
});
|
|
}
|
|
|
|
// Compute DC aggregates
|
|
const effectiveComputeRacks = Math.max(0,
|
|
computeRacksOnline - networkSummary.racksDisconnected);
|
|
const location = LOCATION_CONFIGS[cluster.locationId];
|
|
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
|
const pipelineRacks = updatedCohorts
|
|
.filter(c => c.stage !== 'decommission')
|
|
.reduce((sum, c) => sum + c.count, 0);
|
|
const computeRacksFailed = updatedCohorts
|
|
.filter(c => c.stage === 'repair')
|
|
.reduce((sum, c) => sum + c.count, 0);
|
|
const totalRacksInDc = computeRacksOnline + pipelineRacks;
|
|
const netSlots = networkSummary.networkRackCount;
|
|
const usedSlots = computeRacksOnline + pipelineRacks + netSlots;
|
|
|
|
let usedPowerKW = 0;
|
|
let dcFlops = 0;
|
|
let dcTrainingFlops = 0;
|
|
let dcInferenceFlops = 0;
|
|
let dcTotalVramGB = 0;
|
|
if (dc.rackSkuId && computeRacksOnline > 0) {
|
|
const sku = RACK_SKU_CONFIGS[dc.rackSkuId];
|
|
usedPowerKW = computeRacksOnline * sku.powerDrawKW;
|
|
const bwFraction = networkSummary.effectiveFlopsFraction;
|
|
const interconnectMult = computeInterconnectMultiplier(sku, effectiveComputeRacks, dc.networkFabric);
|
|
dcTrainingFlops = effectiveComputeRacks * sku.trainingFlops * bwFraction * interconnectMult;
|
|
dcInferenceFlops = effectiveComputeRacks * sku.inferenceFlops * bwFraction;
|
|
dcTotalVramGB = computeRacksOnline * sku.totalVramGB;
|
|
dcFlops = dcTrainingFlops + dcInferenceFlops;
|
|
}
|
|
|
|
const pue = COOLING_TYPE_CONFIGS[dc.coolingType].pueMultiplier;
|
|
const energyReduction = researchBonuses?.energyCostReduction ?? 0;
|
|
const energyCostPerTick = (tierConfig.baseEnergyCostPerTick + usedPowerKW * BASE_ENERGY_COST_PER_FLOP)
|
|
* location.energyCostMultiplier * pue * (1 - energyReduction);
|
|
const maintenanceCostPerTick = totalRacksInDc * BASE_MAINTENANCE_PER_RACK;
|
|
const currentUptime = totalRacksInDc > 0 ? effectiveComputeRacks / totalRacksInDc : 1;
|
|
|
|
// Latency penalty from bandwidth degradation
|
|
if (networkSummary.averageBandwidth < 1 && computeRacksOnline > 0) {
|
|
const penalty = (1 - networkSummary.averageBandwidth) * NETWORK_DEGRADATION.bandwidthToLatencyPenalty;
|
|
globalLatencyPenalty += penalty;
|
|
latencyDCCount++;
|
|
}
|
|
|
|
totalFlops += dcFlops;
|
|
totalTrainingFlops += dcTrainingFlops;
|
|
totalInferenceFlops += dcInferenceFlops;
|
|
totalVramGB += dcTotalVramGB;
|
|
totalRackCount += totalRacksInDc + netSlots;
|
|
totalComputeRackCount += totalRacksInDc;
|
|
totalDataCenterCount++;
|
|
if (totalRacksInDc > 0) { totalUptime += currentUptime; dcWithRacks++; }
|
|
|
|
return {
|
|
...dc,
|
|
computeRacksOnline, computeRacksFailed,
|
|
deploymentCohorts: updatedCohorts,
|
|
networkSummary, effectiveComputeRacks,
|
|
usedSlots, usedPowerKW, energyCostPerTick, maintenanceCostPerTick, currentUptime,
|
|
dcTrainingFlops, dcInferenceFlops, dcTotalVramGB,
|
|
};
|
|
});
|
|
|
|
// Process campus retrofit queue
|
|
let finalDCs = dataCenters;
|
|
let updatedQueue: CampusRetrofitQueue | null = campus.retrofitQueue ?? null;
|
|
|
|
if (updatedQueue && updatedQueue.pendingDCIds.length + updatedQueue.activeDCIds.length > 0) {
|
|
updatedQueue = { ...updatedQueue };
|
|
|
|
const newlyCompleted = finalDCs.filter(
|
|
dc => updatedQueue!.activeDCIds.includes(dc.id) && dc.status === 'operational',
|
|
);
|
|
if (newlyCompleted.length > 0) {
|
|
updatedQueue.activeDCIds = updatedQueue.activeDCIds.filter(
|
|
id => !newlyCompleted.some(dc => dc.id === id),
|
|
);
|
|
updatedQueue.completedDCIds = [...updatedQueue.completedDCIds, ...newlyCompleted.map(dc => dc.id)];
|
|
}
|
|
|
|
const slotsAvailable = updatedQueue.maxConcurrent - updatedQueue.activeDCIds.length;
|
|
if (slotsAvailable > 0 && updatedQueue.pendingDCIds.length > 0) {
|
|
const toStart = updatedQueue.pendingDCIds.slice(0, slotsAvailable);
|
|
updatedQueue.pendingDCIds = updatedQueue.pendingDCIds.slice(toStart.length);
|
|
updatedQueue.activeDCIds = [...updatedQueue.activeDCIds, ...toStart];
|
|
|
|
finalDCs = finalDCs.map(dc => {
|
|
if (!toStart.includes(dc.id)) return dc;
|
|
if (dc.status !== 'operational' || !dc.rackSkuId) return dc;
|
|
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
|
const totalRacks = dc.computeRacksOnline + pipelineCount;
|
|
if (totalRacks <= 0) return dc;
|
|
const oldSku = RACK_SKU_CONFIGS[dc.rackSkuId as RackSkuId];
|
|
const decommTicks = Math.ceil(oldSku.pipelineTimeTicks.installation * (1 + COHORT_SCALE_FACTOR * totalRacks));
|
|
return {
|
|
...dc,
|
|
status: 'retrofitting' as const,
|
|
deploymentCohorts: [],
|
|
networkSummary: emptyDCNetworkSummary(),
|
|
retrofitState: {
|
|
fromSkuId: dc.rackSkuId as RackSkuId,
|
|
toSkuId: updatedQueue!.targetSkuId,
|
|
phase: 'decommissioning' as const,
|
|
progress: 0, total: decommTicks, racksRemaining: totalRacks,
|
|
},
|
|
};
|
|
});
|
|
}
|
|
|
|
if (updatedQueue.pendingDCIds.length === 0 && updatedQueue.activeDCIds.length === 0) {
|
|
notifications.push({
|
|
title: 'Campus Retrofit Complete',
|
|
message: `All DCs in ${campus.name} have been retrofitted to ${RACK_SKU_CONFIGS[updatedQueue.targetSkuId].name}!`,
|
|
type: 'success',
|
|
});
|
|
updatedQueue = null;
|
|
}
|
|
}
|
|
|
|
return { ...campus, dataCenters: finalDCs, retrofitQueue: updatedQueue };
|
|
});
|
|
|
|
return { ...cluster, campuses };
|
|
});
|
|
|
|
const avgLatencyPenalty = latencyDCCount > 0 ? globalLatencyPenalty / latencyDCCount : 0;
|
|
|
|
return {
|
|
infrastructure: {
|
|
clusters,
|
|
totalFlops,
|
|
totalTrainingFlops,
|
|
totalInferenceFlops,
|
|
totalVramGB,
|
|
totalUptime: dcWithRacks > 0 ? totalUptime / dcWithRacks : 1,
|
|
totalRackCount,
|
|
totalComputeRackCount,
|
|
totalDataCenterCount,
|
|
networkLatencyPenalty: avgLatencyPenalty,
|
|
},
|
|
notifications,
|
|
repairCosts,
|
|
};
|
|
}
|