Rework network to 6-tier Clos topology with individual switch entities
CI / build-and-push (push) Successful in 31s

Replace aggregate network health stats with a full 6-tier Clos topology
(ToR → T1 → T2 → T3 → T4 → T5) where every switch is an individually
tracked entity with uplinks, repair pipelines, and failure cascades.

Key mechanics:
- Bottleneck bandwidth model (min along path) affects FLOPS and satisfaction
- Rackdown on full disconnect → racks re-enter testing pipeline on recovery
- Binomial failure sampling per tier, dirty-flag cascade optimization
- Flat switch registry for performance at scale
- Three new research nodes: network-redundancy, fast-repair, hot-standby

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 01:33:59 -04:00
parent f8d7a25c6e
commit 54220fca70
9 changed files with 725 additions and 284 deletions
+68 -34
View File
@@ -14,7 +14,8 @@ import {
formatMoney, formatNumber, formatPercent,
LOCATION_CONFIGS, DC_TIER_CONFIGS, RACK_SKU_CONFIGS,
CAMPUS_TIER_COSTS, CLUSTER_COST_CONFIG, FIRST_CAMPUS_BUILD_TICKS,
networkSlotsRequired, maxComputeRacks,
estimateNetworkSlots, maxComputeRacks,
SWITCH_TIER_CONFIGS,
DC_UPGRADE_COST_FRACTION, DC_UPGRADE_INCREMENT,
} from '@ai-tycoon/shared';
import type {
@@ -26,12 +27,14 @@ const ERA_ORDER: Era[] = ['startup', 'scaleup', 'bigtech', 'agi'];
const STAGE_LABELS: Record<PipelineStage, string> = {
ordered: 'Ordered', manufacturing: 'Mfg', receiving: 'Recv',
installation: 'Install', testing: 'Testing', repair: 'Repair', decommission: 'Decom',
installation: 'Install', testing: 'Testing', repair: 'Repair',
'network-down': 'Net Down', decommission: 'Decom',
};
const STAGE_COLORS: Record<PipelineStage, string> = {
ordered: 'bg-surface-600', manufacturing: 'bg-blue-500', receiving: 'bg-cyan-500',
installation: 'bg-violet-500', testing: 'bg-amber-500', repair: 'bg-danger', decommission: 'bg-surface-500',
installation: 'bg-violet-500', testing: 'bg-amber-500', repair: 'bg-danger',
'network-down': 'bg-red-600', decommission: 'bg-surface-500',
};
// ─── Shared Components ──────────────────────────────────────────
@@ -112,7 +115,7 @@ function Breadcrumb({ nav }: { nav: InfraNav }) {
function DeploymentProgressBar({ dc }: { dc: DataCenter }) {
const tierConfig = DC_TIER_CONFIGS[dc.tier];
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
const pipelineRacks = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((s, c) => s + c.count, 0);
const totalTarget = dc.computeRacksOnline + pipelineRacks;
const pct = totalTarget > 0 ? (dc.computeRacksOnline / totalTarget) * 100 : 0;
@@ -157,23 +160,27 @@ function CohortStageBreakdown({ cohorts }: { cohorts: DeploymentCohort[] }) {
}
function NetworkHealthIndicator({ dc }: { dc: DataCenter }) {
const nh = dc.networkHealth;
if (nh.tier1Required === 0) return null;
const ns = dc.networkSummary;
if (ns.switchIds.length === 0) return null;
const allHealthy = nh.tier1Healthy === nh.tier1Required
&& nh.tier2Healthy === nh.tier2Required
&& nh.tier3Healthy === nh.tier3Required;
const hasDisconnected = ns.racksDisconnected > 0;
const hasDegraded = ns.racksDegraded > 0;
const coreDown = (ns.healthyByTier?.t3 ?? 0) < (ns.totalByTier?.t3 ?? 0);
const color = nh.tier3Healthy < nh.tier3Required ? 'text-danger'
: !allHealthy ? 'text-amber-400'
const color = coreDown ? 'text-danger'
: hasDisconnected ? 'text-danger'
: hasDegraded ? 'text-amber-400'
: 'text-green-400';
const bwPct = Math.round(ns.averageBandwidth * 100);
return (
<div className={`flex items-center gap-1 text-xs ${color}`}>
<Network size={12} />
<span>
{nh.tier3Healthy < nh.tier3Required ? 'Core Down'
: !allHealthy ? `${nh.racksDisconnected} disconnected`
{coreDown ? 'Core Down'
: hasDisconnected ? `${ns.racksDisconnected} disconnected`
: hasDegraded ? `${bwPct}% bandwidth`
: 'Healthy'}
</span>
</div>
@@ -661,7 +668,7 @@ function FillAllDCsModal({ campus, money, era, research, onConfirm, onClose }: {
const { qty, cost } = computeFillForDC(dc, selectedSku, remaining);
if (qty === 0) {
const tierConfig = DC_TIER_CONFIGS[dc.tier];
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
const isFull = maxCompute - (dc.computeRacksOnline + pipelineCount) <= 0;
return { dc, qty: 0, cost: 0, reason: isFull ? 'Already full' : 'No budget' };
@@ -929,7 +936,7 @@ function CampusDetailView({ clusterId, campusId }: { clusterId: string; campusId
const hasRetrofitQueue = !!campus.retrofitQueue;
const fillableDCs = operationalDCs.filter(dc => {
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
return maxCompute - (dc.computeRacksOnline + pipelineCount) > 0;
});
@@ -1124,12 +1131,12 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
if (!dc || !cluster) return <div className="text-surface-400">Data center not found.</div>;
const tierConfig = DC_TIER_CONFIGS[dc.tier];
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((s, c) => s + c.count, 0);
const existingCompute = dc.computeRacksOnline + pipelineCount;
const availableSlots = maxCompute - existingCompute;
const sku = dc.rackSkuId ? RACK_SKU_CONFIGS[dc.rackSkuId] : null;
const netSlots = networkSlotsRequired(existingCompute);
const netSlots = estimateNetworkSlots(existingCompute, dc.tier);
const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => {
if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false;
@@ -1257,7 +1264,7 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
{(() => {
const skuToUse = dc.rackSkuId ?? selectedSku!;
const skuConfig = RACK_SKU_CONFIGS[skuToUse];
const newNetSlots = networkSlotsRequired(existingCompute + deployQty);
const newNetSlots = estimateNetworkSlots(existingCompute + deployQty, dc.tier);
const addedNet = newNetSlots - netSlots;
const totalCost = skuConfig.baseCost * deployQty;
return (
@@ -1358,24 +1365,51 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
<p className="text-sm text-surface-400">No racks online. Deploy racks to see network topology.</p>
) : (
<div className="space-y-3">
{[
{ label: 'Tier-1 (ToR)', required: dc.networkHealth.tier1Required, healthy: dc.networkHealth.tier1Healthy, desc: `1 per ${24} compute racks` },
{ label: 'Tier-2 (Aggr)', required: dc.networkHealth.tier2Required, healthy: dc.networkHealth.tier2Healthy, desc: `1 per ${6} Tier-1 switches` },
{ label: 'Tier-3 (Core)', required: dc.networkHealth.tier3Required, healthy: dc.networkHealth.tier3Healthy, desc: 'Redundant pair' },
].map(tier => (
<div key={tier.label} className="flex items-center justify-between p-3 border border-surface-600 rounded-lg">
<div>
<div className="font-medium text-sm">{tier.label}</div>
<div className="text-xs text-surface-400">{tier.desc}</div>
</div>
<div className={`text-sm font-mono ${tier.healthy < tier.required ? 'text-danger' : 'text-green-400'}`}>
{tier.healthy} / {tier.required}
</div>
{/* Bandwidth gauge */}
<div className="p-3 border border-surface-600 rounded-lg">
<div className="flex items-center justify-between mb-1">
<span className="text-sm font-medium">Bandwidth</span>
<span className={`text-sm font-mono ${dc.networkSummary.averageBandwidth < 0.8 ? 'text-warning' : dc.networkSummary.averageBandwidth < 0.5 ? 'text-danger' : 'text-green-400'}`}>
{formatPercent(dc.networkSummary.averageBandwidth)}
</span>
</div>
))}
{dc.networkHealth.racksDisconnected > 0 && (
<div className="w-full bg-surface-900 rounded-full h-2">
<div
className={`h-2 rounded-full transition-all ${dc.networkSummary.averageBandwidth >= 0.8 ? 'bg-green-500' : dc.networkSummary.averageBandwidth >= 0.5 ? 'bg-yellow-500' : 'bg-red-500'}`}
style={{ width: `${dc.networkSummary.averageBandwidth * 100}%` }}
/>
</div>
<div className="flex justify-between mt-1 text-xs text-surface-500">
<span>Effective FLOPS: {formatPercent(dc.networkSummary.effectiveFlopsFraction)}</span>
<span>{dc.networkSummary.racksDegraded} degraded</span>
</div>
</div>
{/* Per-tier switch health */}
{(['tor', 't1', 't2', 't3'] as const).map(tier => {
const total = dc.networkSummary.totalByTier[tier] ?? 0;
if (total === 0) return null;
const healthy = dc.networkSummary.healthyByTier[tier] ?? 0;
const failed = total - healthy;
const config = SWITCH_TIER_CONFIGS[tier];
return (
<div key={tier} className="flex items-center justify-between p-3 border border-surface-600 rounded-lg">
<div>
<div className="font-medium text-sm">{config.name}</div>
<div className="text-xs text-surface-400">
{tier === 'tor' ? '1 per rack (embedded)' : `Fan-out ${config.fanOut}, ${config.uplinkCount} uplinks`}
</div>
</div>
<div className={`text-sm font-mono ${failed > 0 ? 'text-danger' : 'text-green-400'}`}>
{healthy} / {total}
</div>
</div>
);
})}
{dc.networkSummary.racksDisconnected > 0 && (
<div className="text-sm text-danger flex items-center gap-2 p-2">
<Activity size={14} /> {dc.networkHealth.racksDisconnected} compute racks disconnected due to network failures
<Activity size={14} /> {dc.networkSummary.racksDisconnected} compute racks disconnected due to network failures
</div>
)}
</div>