Rework network to 6-tier Clos topology with individual switch entities
CI / build-and-push (push) Successful in 31s
CI / build-and-push (push) Successful in 31s
Replace aggregate network health stats with a full 6-tier Clos topology (ToR → T1 → T2 → T3 → T4 → T5) where every switch is an individually tracked entity with uplinks, repair pipelines, and failure cascades. Key mechanics: - Bottleneck bandwidth model (min along path) affects FLOPS and satisfaction - Rackdown on full disconnect → racks re-enter testing pipeline on recovery - Binomial failure sampling per tier, dirty-flag cascade optimization - Flat switch registry for performance at scale - Three new research nodes: network-redundancy, fast-repair, hot-standby Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,7 +14,8 @@ import {
|
||||
formatMoney, formatNumber, formatPercent,
|
||||
LOCATION_CONFIGS, DC_TIER_CONFIGS, RACK_SKU_CONFIGS,
|
||||
CAMPUS_TIER_COSTS, CLUSTER_COST_CONFIG, FIRST_CAMPUS_BUILD_TICKS,
|
||||
networkSlotsRequired, maxComputeRacks,
|
||||
estimateNetworkSlots, maxComputeRacks,
|
||||
SWITCH_TIER_CONFIGS,
|
||||
DC_UPGRADE_COST_FRACTION, DC_UPGRADE_INCREMENT,
|
||||
} from '@ai-tycoon/shared';
|
||||
import type {
|
||||
@@ -26,12 +27,14 @@ const ERA_ORDER: Era[] = ['startup', 'scaleup', 'bigtech', 'agi'];
|
||||
|
||||
const STAGE_LABELS: Record<PipelineStage, string> = {
|
||||
ordered: 'Ordered', manufacturing: 'Mfg', receiving: 'Recv',
|
||||
installation: 'Install', testing: 'Testing', repair: 'Repair', decommission: 'Decom',
|
||||
installation: 'Install', testing: 'Testing', repair: 'Repair',
|
||||
'network-down': 'Net Down', decommission: 'Decom',
|
||||
};
|
||||
|
||||
const STAGE_COLORS: Record<PipelineStage, string> = {
|
||||
ordered: 'bg-surface-600', manufacturing: 'bg-blue-500', receiving: 'bg-cyan-500',
|
||||
installation: 'bg-violet-500', testing: 'bg-amber-500', repair: 'bg-danger', decommission: 'bg-surface-500',
|
||||
installation: 'bg-violet-500', testing: 'bg-amber-500', repair: 'bg-danger',
|
||||
'network-down': 'bg-red-600', decommission: 'bg-surface-500',
|
||||
};
|
||||
|
||||
// ─── Shared Components ──────────────────────────────────────────
|
||||
@@ -112,7 +115,7 @@ function Breadcrumb({ nav }: { nav: InfraNav }) {
|
||||
|
||||
function DeploymentProgressBar({ dc }: { dc: DataCenter }) {
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineRacks = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((s, c) => s + c.count, 0);
|
||||
const totalTarget = dc.computeRacksOnline + pipelineRacks;
|
||||
const pct = totalTarget > 0 ? (dc.computeRacksOnline / totalTarget) * 100 : 0;
|
||||
@@ -157,23 +160,27 @@ function CohortStageBreakdown({ cohorts }: { cohorts: DeploymentCohort[] }) {
|
||||
}
|
||||
|
||||
function NetworkHealthIndicator({ dc }: { dc: DataCenter }) {
|
||||
const nh = dc.networkHealth;
|
||||
if (nh.tier1Required === 0) return null;
|
||||
const ns = dc.networkSummary;
|
||||
if (ns.switchIds.length === 0) return null;
|
||||
|
||||
const allHealthy = nh.tier1Healthy === nh.tier1Required
|
||||
&& nh.tier2Healthy === nh.tier2Required
|
||||
&& nh.tier3Healthy === nh.tier3Required;
|
||||
const hasDisconnected = ns.racksDisconnected > 0;
|
||||
const hasDegraded = ns.racksDegraded > 0;
|
||||
const coreDown = (ns.healthyByTier?.t3 ?? 0) < (ns.totalByTier?.t3 ?? 0);
|
||||
|
||||
const color = nh.tier3Healthy < nh.tier3Required ? 'text-danger'
|
||||
: !allHealthy ? 'text-amber-400'
|
||||
const color = coreDown ? 'text-danger'
|
||||
: hasDisconnected ? 'text-danger'
|
||||
: hasDegraded ? 'text-amber-400'
|
||||
: 'text-green-400';
|
||||
|
||||
const bwPct = Math.round(ns.averageBandwidth * 100);
|
||||
|
||||
return (
|
||||
<div className={`flex items-center gap-1 text-xs ${color}`}>
|
||||
<Network size={12} />
|
||||
<span>
|
||||
{nh.tier3Healthy < nh.tier3Required ? 'Core Down'
|
||||
: !allHealthy ? `${nh.racksDisconnected} disconnected`
|
||||
{coreDown ? 'Core Down'
|
||||
: hasDisconnected ? `${ns.racksDisconnected} disconnected`
|
||||
: hasDegraded ? `${bwPct}% bandwidth`
|
||||
: 'Healthy'}
|
||||
</span>
|
||||
</div>
|
||||
@@ -661,7 +668,7 @@ function FillAllDCsModal({ campus, money, era, research, onConfirm, onClose }: {
|
||||
const { qty, cost } = computeFillForDC(dc, selectedSku, remaining);
|
||||
if (qty === 0) {
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
||||
const isFull = maxCompute - (dc.computeRacksOnline + pipelineCount) <= 0;
|
||||
return { dc, qty: 0, cost: 0, reason: isFull ? 'Already full' : 'No budget' };
|
||||
@@ -929,7 +936,7 @@ function CampusDetailView({ clusterId, campusId }: { clusterId: string; campusId
|
||||
const hasRetrofitQueue = !!campus.retrofitQueue;
|
||||
|
||||
const fillableDCs = operationalDCs.filter(dc => {
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
||||
return maxCompute - (dc.computeRacksOnline + pipelineCount) > 0;
|
||||
});
|
||||
@@ -1124,12 +1131,12 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
if (!dc || !cluster) return <div className="text-surface-400">Data center not found.</div>;
|
||||
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((s, c) => s + c.count, 0);
|
||||
const existingCompute = dc.computeRacksOnline + pipelineCount;
|
||||
const availableSlots = maxCompute - existingCompute;
|
||||
const sku = dc.rackSkuId ? RACK_SKU_CONFIGS[dc.rackSkuId] : null;
|
||||
const netSlots = networkSlotsRequired(existingCompute);
|
||||
const netSlots = estimateNetworkSlots(existingCompute, dc.tier);
|
||||
|
||||
const availableSkus = Object.values(RACK_SKU_CONFIGS).filter(s => {
|
||||
if (ERA_ORDER.indexOf(era) < ERA_ORDER.indexOf(s.era)) return false;
|
||||
@@ -1257,7 +1264,7 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
{(() => {
|
||||
const skuToUse = dc.rackSkuId ?? selectedSku!;
|
||||
const skuConfig = RACK_SKU_CONFIGS[skuToUse];
|
||||
const newNetSlots = networkSlotsRequired(existingCompute + deployQty);
|
||||
const newNetSlots = estimateNetworkSlots(existingCompute + deployQty, dc.tier);
|
||||
const addedNet = newNetSlots - netSlots;
|
||||
const totalCost = skuConfig.baseCost * deployQty;
|
||||
return (
|
||||
@@ -1358,24 +1365,51 @@ function DataCenterDetailView({ clusterId, campusId, datacenterId }: {
|
||||
<p className="text-sm text-surface-400">No racks online. Deploy racks to see network topology.</p>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
{[
|
||||
{ label: 'Tier-1 (ToR)', required: dc.networkHealth.tier1Required, healthy: dc.networkHealth.tier1Healthy, desc: `1 per ${24} compute racks` },
|
||||
{ label: 'Tier-2 (Aggr)', required: dc.networkHealth.tier2Required, healthy: dc.networkHealth.tier2Healthy, desc: `1 per ${6} Tier-1 switches` },
|
||||
{ label: 'Tier-3 (Core)', required: dc.networkHealth.tier3Required, healthy: dc.networkHealth.tier3Healthy, desc: 'Redundant pair' },
|
||||
].map(tier => (
|
||||
<div key={tier.label} className="flex items-center justify-between p-3 border border-surface-600 rounded-lg">
|
||||
<div>
|
||||
<div className="font-medium text-sm">{tier.label}</div>
|
||||
<div className="text-xs text-surface-400">{tier.desc}</div>
|
||||
</div>
|
||||
<div className={`text-sm font-mono ${tier.healthy < tier.required ? 'text-danger' : 'text-green-400'}`}>
|
||||
{tier.healthy} / {tier.required}
|
||||
</div>
|
||||
{/* Bandwidth gauge */}
|
||||
<div className="p-3 border border-surface-600 rounded-lg">
|
||||
<div className="flex items-center justify-between mb-1">
|
||||
<span className="text-sm font-medium">Bandwidth</span>
|
||||
<span className={`text-sm font-mono ${dc.networkSummary.averageBandwidth < 0.8 ? 'text-warning' : dc.networkSummary.averageBandwidth < 0.5 ? 'text-danger' : 'text-green-400'}`}>
|
||||
{formatPercent(dc.networkSummary.averageBandwidth)}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
{dc.networkHealth.racksDisconnected > 0 && (
|
||||
<div className="w-full bg-surface-900 rounded-full h-2">
|
||||
<div
|
||||
className={`h-2 rounded-full transition-all ${dc.networkSummary.averageBandwidth >= 0.8 ? 'bg-green-500' : dc.networkSummary.averageBandwidth >= 0.5 ? 'bg-yellow-500' : 'bg-red-500'}`}
|
||||
style={{ width: `${dc.networkSummary.averageBandwidth * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex justify-between mt-1 text-xs text-surface-500">
|
||||
<span>Effective FLOPS: {formatPercent(dc.networkSummary.effectiveFlopsFraction)}</span>
|
||||
<span>{dc.networkSummary.racksDegraded} degraded</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Per-tier switch health */}
|
||||
{(['tor', 't1', 't2', 't3'] as const).map(tier => {
|
||||
const total = dc.networkSummary.totalByTier[tier] ?? 0;
|
||||
if (total === 0) return null;
|
||||
const healthy = dc.networkSummary.healthyByTier[tier] ?? 0;
|
||||
const failed = total - healthy;
|
||||
const config = SWITCH_TIER_CONFIGS[tier];
|
||||
return (
|
||||
<div key={tier} className="flex items-center justify-between p-3 border border-surface-600 rounded-lg">
|
||||
<div>
|
||||
<div className="font-medium text-sm">{config.name}</div>
|
||||
<div className="text-xs text-surface-400">
|
||||
{tier === 'tor' ? '1 per rack (embedded)' : `Fan-out ${config.fanOut}, ${config.uplinkCount} uplinks`}
|
||||
</div>
|
||||
</div>
|
||||
<div className={`text-sm font-mono ${failed > 0 ? 'text-danger' : 'text-green-400'}`}>
|
||||
{healthy} / {total}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
|
||||
{dc.networkSummary.racksDisconnected > 0 && (
|
||||
<div className="text-sm text-danger flex items-center gap-2 p-2">
|
||||
<Activity size={14} /> {dc.networkHealth.racksDisconnected} compute racks disconnected due to network failures
|
||||
<Activity size={14} /> {dc.networkSummary.racksDisconnected} compute racks disconnected due to network failures
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
+21
-22
@@ -9,7 +9,7 @@ import type {
|
||||
Cluster, Campus, DataCenter, DCTier, RackSkuId, TrainingJob,
|
||||
ActiveResearch, OwnedDataset, LocationId,
|
||||
DeploymentCohort, PipelineStage,
|
||||
NetworkHealthState, CampusRetrofitQueue,
|
||||
CampusRetrofitQueue,
|
||||
} from '@ai-tycoon/shared';
|
||||
import type { FundingRoundType, OverloadPolicy, TuningPreset, ModelTuning } from '@ai-tycoon/shared';
|
||||
import {
|
||||
@@ -25,9 +25,12 @@ import {
|
||||
FUNDING_ROUNDS,
|
||||
OPEN_SOURCE_REPUTATION_BOOST,
|
||||
LOCATION_CONFIGS,
|
||||
networkSlotsRequired, maxComputeRacks,
|
||||
estimateNetworkSlots, maxComputeRacks,
|
||||
uuid,
|
||||
} from '@ai-tycoon/shared';
|
||||
import {
|
||||
emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary,
|
||||
} from '@ai-tycoon/game-engine';
|
||||
import { INITIAL_RIVALS } from '@ai-tycoon/game-engine';
|
||||
|
||||
export type ActivePage = 'dashboard' | 'infrastructure' | 'research' | 'models'
|
||||
@@ -57,8 +60,14 @@ export interface GameNotification {
|
||||
read: boolean;
|
||||
}
|
||||
|
||||
function emptyNetworkHealth(): NetworkHealthState {
|
||||
return { tier1Required: 0, tier1Healthy: 0, tier2Required: 0, tier2Healthy: 0, tier3Required: 0, tier3Healthy: 0, racksDisconnected: 0 };
|
||||
function emptyDC(): Pick<DataCenter, 'networkSummary' | 'effectiveComputeRacks' | 'usedSlots' | 'usedPowerKW' | 'energyCostPerTick' | 'maintenanceCostPerTick' | 'currentUptime'> {
|
||||
return {
|
||||
networkSummary: emptyDCNetworkSummary(),
|
||||
effectiveComputeRacks: 0,
|
||||
usedSlots: 0, usedPowerKW: 0,
|
||||
energyCostPerTick: 0, maintenanceCostPerTick: 0,
|
||||
currentUptime: 1,
|
||||
};
|
||||
}
|
||||
|
||||
interface Actions {
|
||||
@@ -189,7 +198,7 @@ export function computeFillForDC(
|
||||
|
||||
const sku = RACK_SKU_CONFIGS[skuId];
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
||||
const existingCompute = dc.computeRacksOnline + pipelineCount;
|
||||
const available = maxCompute - existingCompute;
|
||||
@@ -319,6 +328,7 @@ export const useGameStore = create<Store>()(
|
||||
status: isFirst ? 'operational' : 'constructing',
|
||||
constructionProgress: isFirst ? 0 : 0,
|
||||
constructionTotal: isFirst ? 0 : CLUSTER_COST_CONFIG.buildTimeTicks,
|
||||
networkSummary: emptyClusterNetworkSummary(),
|
||||
};
|
||||
|
||||
return {
|
||||
@@ -358,6 +368,7 @@ export const useGameStore = create<Store>()(
|
||||
constructionProgress: 0,
|
||||
constructionTotal: buildTime,
|
||||
retrofitQueue: null,
|
||||
networkSummary: emptyCampusNetworkSummary(),
|
||||
};
|
||||
|
||||
return {
|
||||
@@ -398,17 +409,11 @@ export const useGameStore = create<Store>()(
|
||||
rackSkuId: null,
|
||||
computeRacksOnline: 0,
|
||||
computeRacksFailed: 0,
|
||||
networkHealth: emptyNetworkHealth(),
|
||||
...emptyDC(),
|
||||
deploymentCohorts: [],
|
||||
retrofitState: null,
|
||||
coolingLevel: 0,
|
||||
redundancyLevel: 0,
|
||||
effectiveComputeRacks: 0,
|
||||
usedSlots: 0,
|
||||
usedPowerKW: 0,
|
||||
energyCostPerTick: 0,
|
||||
maintenanceCostPerTick: 0,
|
||||
currentUptime: 1,
|
||||
};
|
||||
|
||||
return {
|
||||
@@ -437,14 +442,14 @@ export const useGameStore = create<Store>()(
|
||||
if (sku.requiredResearch && !s.research.completedResearch.includes(sku.requiredResearch)) return s;
|
||||
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
||||
const existingCompute = dc.computeRacksOnline + pipelineCount;
|
||||
const available = maxCompute - existingCompute;
|
||||
const actualQty = Math.min(quantity, available);
|
||||
if (actualQty <= 0) return s;
|
||||
|
||||
const totalNetSlots = networkSlotsRequired(existingCompute + actualQty);
|
||||
const totalNetSlots = estimateNetworkSlots(existingCompute + actualQty, dc.tier);
|
||||
const totalSlotsNeeded = existingCompute + actualQty + totalNetSlots;
|
||||
if (totalSlotsNeeded > tierConfig.rackSlots) return s;
|
||||
|
||||
@@ -484,7 +489,7 @@ export const useGameStore = create<Store>()(
|
||||
|
||||
const dc = found.dc;
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots);
|
||||
const maxCompute = maxComputeRacks(tierConfig.rackSlots, dc.tier);
|
||||
const pipelineCount = dc.deploymentCohorts.filter(c => c.stage !== 'decommission').reduce((sum, c) => sum + c.count, 0);
|
||||
const existingCompute = dc.computeRacksOnline + pipelineCount;
|
||||
const available = maxCompute - existingCompute;
|
||||
@@ -522,17 +527,11 @@ export const useGameStore = create<Store>()(
|
||||
rackSkuId: null,
|
||||
computeRacksOnline: 0,
|
||||
computeRacksFailed: 0,
|
||||
networkHealth: emptyNetworkHealth(),
|
||||
...emptyDC(),
|
||||
deploymentCohorts: [],
|
||||
retrofitState: null,
|
||||
coolingLevel: 0,
|
||||
redundancyLevel: 0,
|
||||
effectiveComputeRacks: 0,
|
||||
usedSlots: 0,
|
||||
usedPowerKW: 0,
|
||||
energyCostPerTick: 0,
|
||||
maintenanceCostPerTick: 0,
|
||||
currentUptime: 1,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user