Rework network to 6-tier Clos topology with individual switch entities
CI / build-and-push (push) Successful in 31s
CI / build-and-push (push) Successful in 31s
Replace aggregate network health stats with a full 6-tier Clos topology (ToR → T1 → T2 → T3 → T4 → T5) where every switch is an individually tracked entity with uplinks, repair pipelines, and failure cascades. Key mechanics: - Bottleneck bandwidth model (min along path) affects FLOPS and satisfaction - Rackdown on full disconnect → racks re-enter testing pipeline on recovery - Binomial failure sampling per tier, dirty-flag cascade optimization - Flat switch registry for performance at scale - Three new research nodes: network-redundancy, fast-repair, hot-standby Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, NetworkTopologyConfig, CampusTierCost, ClusterCostConfig } from '../types/infrastructure';
|
||||
import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig } from '../types/infrastructure';
|
||||
|
||||
export const TICK_INTERVAL_MS = 1000;
|
||||
export const MAX_OFFLINE_TICKS = 86_400;
|
||||
@@ -123,19 +123,92 @@ export const CLUSTER_COST_CONFIG: ClusterCostConfig = {
|
||||
buildTimeTicks: 600,
|
||||
};
|
||||
|
||||
// --- Network Topology ---
|
||||
// --- Network Topology (6-Tier Clos) ---
|
||||
|
||||
export const NETWORK_TOPOLOGY: NetworkTopologyConfig = {
|
||||
tier1PerCompute: 24,
|
||||
tier2PerTier1: 6,
|
||||
tier3PerDC: 2,
|
||||
tier1FailureRate: 0.0001,
|
||||
tier2FailureRate: 0.00005,
|
||||
tier3FailureRate: 0.00002,
|
||||
tier1BlastRadius: 24,
|
||||
tier2BlastRadiusMultiplier: 6,
|
||||
export const SWITCH_TIER_CONFIGS: Record<SwitchTier, SwitchTierConfig> = {
|
||||
tor: {
|
||||
tier: 'tor', name: 'Top-of-Rack',
|
||||
baseCost: 0, uplinkCount: 2, fanOut: 1,
|
||||
failureRatePerTick: 0.00005, repairBaseTicks: 15,
|
||||
switchesPerNetworkRack: 0, powerDrawKW: 0,
|
||||
},
|
||||
t1: {
|
||||
tier: 't1', name: 'Tier-1 Aggregation',
|
||||
baseCost: 8_000, uplinkCount: 2, fanOut: 24,
|
||||
failureRatePerTick: 0.0001, repairBaseTicks: 25,
|
||||
switchesPerNetworkRack: 4, powerDrawKW: 0.5,
|
||||
},
|
||||
t2: {
|
||||
tier: 't2', name: 'Tier-2 Spine',
|
||||
baseCost: 25_000, uplinkCount: 4, fanOut: 6,
|
||||
failureRatePerTick: 0.00008, repairBaseTicks: 40,
|
||||
switchesPerNetworkRack: 2, powerDrawKW: 1.0,
|
||||
},
|
||||
t3: {
|
||||
tier: 't3', name: 'Tier-3 Core',
|
||||
baseCost: 80_000, uplinkCount: 4, fanOut: 0,
|
||||
failureRatePerTick: 0.00004, repairBaseTicks: 60,
|
||||
switchesPerNetworkRack: 1, powerDrawKW: 2.0,
|
||||
},
|
||||
t4: {
|
||||
tier: 't4', name: 'Tier-4 Campus',
|
||||
baseCost: 200_000, uplinkCount: 4, fanOut: 0,
|
||||
failureRatePerTick: 0.00002, repairBaseTicks: 90,
|
||||
switchesPerNetworkRack: 0, powerDrawKW: 3.0,
|
||||
},
|
||||
t5: {
|
||||
tier: 't5', name: 'Tier-5 Cluster',
|
||||
baseCost: 500_000, uplinkCount: 0, fanOut: 0,
|
||||
failureRatePerTick: 0.00001, repairBaseTicks: 120,
|
||||
switchesPerNetworkRack: 0, powerDrawKW: 5.0,
|
||||
},
|
||||
};
|
||||
|
||||
export const NETWORK_RACK_COST = 5_000;
|
||||
|
||||
export const T3_COUNT_PER_DC_TIER: Record<DCTier, number> = {
|
||||
small: 2, medium: 2, large: 3, mega: 4,
|
||||
};
|
||||
|
||||
export const T4_COUNT_PER_CAMPUS: Record<DCTier, number> = {
|
||||
small: 2, medium: 2, large: 3, mega: 4,
|
||||
};
|
||||
|
||||
export const T5_COUNT_PER_CLUSTER = 2;
|
||||
|
||||
export const NETWORK_DEGRADATION = {
|
||||
bandwidthToLatencyPenalty: 0.3,
|
||||
satisfactionPenaltyPerLatency: 0.05,
|
||||
};
|
||||
|
||||
export const SWITCH_REPAIR_COST_FRACTION = 0.3;
|
||||
|
||||
export function estimateNetworkSlots(computeRacks: number, dcTier: DCTier): number {
|
||||
if (computeRacks <= 0) return 0;
|
||||
const t1Count = Math.ceil(computeRacks / SWITCH_TIER_CONFIGS.t1.fanOut);
|
||||
const t2Count = Math.ceil(t1Count / SWITCH_TIER_CONFIGS.t2.fanOut);
|
||||
const t3Count = T3_COUNT_PER_DC_TIER[dcTier];
|
||||
const t1Racks = Math.ceil(t1Count / SWITCH_TIER_CONFIGS.t1.switchesPerNetworkRack);
|
||||
const t2Racks = Math.ceil(t2Count / SWITCH_TIER_CONFIGS.t2.switchesPerNetworkRack);
|
||||
const t3Racks = Math.ceil(t3Count / SWITCH_TIER_CONFIGS.t3.switchesPerNetworkRack);
|
||||
return t1Racks + t2Racks + t3Racks;
|
||||
}
|
||||
|
||||
export function maxComputeRacks(totalSlots: number, dcTier: DCTier): number {
|
||||
if (totalSlots <= 2) return 0;
|
||||
let lo = 0;
|
||||
let hi = totalSlots;
|
||||
while (lo < hi) {
|
||||
const mid = Math.ceil((lo + hi) / 2);
|
||||
if (mid + estimateNetworkSlots(mid, dcTier) <= totalSlots) {
|
||||
lo = mid;
|
||||
} else {
|
||||
hi = mid - 1;
|
||||
}
|
||||
}
|
||||
return lo;
|
||||
}
|
||||
|
||||
// --- Rack SKU Configs ---
|
||||
|
||||
export const RACK_SKU_CONFIGS: Record<RackSkuId, RackSkuConfig> = {
|
||||
|
||||
@@ -58,4 +58,4 @@ export const INITIAL_SETTINGS: GameSettings = {
|
||||
sfxVolume: 0.7,
|
||||
};
|
||||
|
||||
export const SAVE_VERSION = 3;
|
||||
export const SAVE_VERSION = 4;
|
||||
|
||||
@@ -12,6 +12,7 @@ export interface Cluster {
|
||||
status: ClusterStatus;
|
||||
constructionProgress: number;
|
||||
constructionTotal: number;
|
||||
networkSummary: ClusterNetworkSummary;
|
||||
}
|
||||
|
||||
// --- Campus (holds same-tier DCs) ---
|
||||
@@ -37,6 +38,7 @@ export interface Campus {
|
||||
constructionProgress: number;
|
||||
constructionTotal: number;
|
||||
retrofitQueue: CampusRetrofitQueue | null;
|
||||
networkSummary: CampusNetworkSummary;
|
||||
}
|
||||
|
||||
// --- Data Center ---
|
||||
@@ -68,7 +70,7 @@ export interface DataCenter {
|
||||
rackSkuId: RackSkuId | null;
|
||||
computeRacksOnline: number;
|
||||
computeRacksFailed: number;
|
||||
networkHealth: NetworkHealthState;
|
||||
networkSummary: DCNetworkSummary;
|
||||
deploymentCohorts: DeploymentCohort[];
|
||||
retrofitState: RetrofitState | null;
|
||||
coolingLevel: number;
|
||||
@@ -81,50 +83,62 @@ export interface DataCenter {
|
||||
currentUptime: number;
|
||||
}
|
||||
|
||||
// --- Network Topology ---
|
||||
// --- Network Topology (6-Tier Clos) ---
|
||||
|
||||
export interface NetworkHealthState {
|
||||
tier1Required: number;
|
||||
tier1Healthy: number;
|
||||
tier2Required: number;
|
||||
tier2Healthy: number;
|
||||
tier3Required: number;
|
||||
tier3Healthy: number;
|
||||
export type SwitchTier = 'tor' | 't1' | 't2' | 't3' | 't4' | 't5';
|
||||
export type SwitchStatus = 'healthy' | 'failed' | 'repairing';
|
||||
|
||||
export interface NetworkSwitch {
|
||||
id: string;
|
||||
tier: SwitchTier;
|
||||
status: SwitchStatus;
|
||||
dcId: string | null;
|
||||
campusId: string | null;
|
||||
clusterId: string | null;
|
||||
uplinkIds: string[];
|
||||
downlinkIds: string[];
|
||||
activeUplinks: number;
|
||||
totalUplinks: number;
|
||||
effectiveBandwidth: number;
|
||||
repairProgress: number;
|
||||
repairTotal: number;
|
||||
}
|
||||
|
||||
export interface SwitchTierConfig {
|
||||
tier: SwitchTier;
|
||||
name: string;
|
||||
baseCost: number;
|
||||
uplinkCount: number;
|
||||
fanOut: number;
|
||||
failureRatePerTick: number;
|
||||
repairBaseTicks: number;
|
||||
switchesPerNetworkRack: number;
|
||||
powerDrawKW: number;
|
||||
}
|
||||
|
||||
export interface DCNetworkSummary {
|
||||
switchIds: string[];
|
||||
networkRackCount: number;
|
||||
totalByTier: Partial<Record<SwitchTier, number>>;
|
||||
healthyByTier: Partial<Record<SwitchTier, number>>;
|
||||
racksDisconnected: number;
|
||||
racksDegraded: number;
|
||||
averageBandwidth: number;
|
||||
effectiveFlopsFraction: number;
|
||||
}
|
||||
|
||||
export interface NetworkTopologyConfig {
|
||||
tier1PerCompute: number;
|
||||
tier2PerTier1: number;
|
||||
tier3PerDC: number;
|
||||
tier1FailureRate: number;
|
||||
tier2FailureRate: number;
|
||||
tier3FailureRate: number;
|
||||
tier1BlastRadius: number;
|
||||
tier2BlastRadiusMultiplier: number;
|
||||
export interface CampusNetworkSummary {
|
||||
switchIds: string[];
|
||||
totalT4: number;
|
||||
healthyT4: number;
|
||||
crossDCBandwidth: number;
|
||||
}
|
||||
|
||||
export function networkSlotsRequired(computeRacks: number): number {
|
||||
if (computeRacks <= 0) return 0;
|
||||
const tier1 = Math.ceil(computeRacks / 24);
|
||||
const tier2 = Math.ceil(tier1 / 6);
|
||||
const tier3 = 2;
|
||||
return tier1 + tier2 + tier3;
|
||||
}
|
||||
|
||||
export function maxComputeRacks(totalSlots: number): number {
|
||||
if (totalSlots <= 2) return 0;
|
||||
let lo = 0;
|
||||
let hi = totalSlots;
|
||||
while (lo < hi) {
|
||||
const mid = Math.ceil((lo + hi) / 2);
|
||||
if (mid + networkSlotsRequired(mid) <= totalSlots) {
|
||||
lo = mid;
|
||||
} else {
|
||||
hi = mid - 1;
|
||||
}
|
||||
}
|
||||
return lo;
|
||||
export interface ClusterNetworkSummary {
|
||||
switchIds: string[];
|
||||
totalT5: number;
|
||||
healthyT5: number;
|
||||
crossCampusBandwidth: number;
|
||||
}
|
||||
|
||||
// --- Racks ---
|
||||
@@ -137,7 +151,7 @@ export type RackSkuId =
|
||||
|
||||
export type PipelineStage =
|
||||
| 'ordered' | 'manufacturing' | 'receiving'
|
||||
| 'installation' | 'testing' | 'repair' | 'decommission';
|
||||
| 'installation' | 'testing' | 'repair' | 'network-down' | 'decommission';
|
||||
|
||||
export interface PipelineTimings {
|
||||
manufacturing: number;
|
||||
@@ -202,20 +216,24 @@ export interface ClusterCostConfig {
|
||||
|
||||
export interface InfrastructureState {
|
||||
clusters: Cluster[];
|
||||
switchRegistry: Record<string, NetworkSwitch>;
|
||||
totalFlops: number;
|
||||
totalUptime: number;
|
||||
totalRackCount: number;
|
||||
totalComputeRackCount: number;
|
||||
totalDataCenterCount: number;
|
||||
networkLatencyPenalty: number;
|
||||
}
|
||||
|
||||
export const INITIAL_INFRASTRUCTURE: InfrastructureState = {
|
||||
clusters: [],
|
||||
switchRegistry: {},
|
||||
totalFlops: 0,
|
||||
totalUptime: 1,
|
||||
totalRackCount: 0,
|
||||
totalComputeRackCount: 0,
|
||||
totalDataCenterCount: 0,
|
||||
networkLatencyPenalty: 0,
|
||||
};
|
||||
|
||||
// --- Locations ---
|
||||
|
||||
Reference in New Issue
Block a user