Redesign infrastructure to hypercluster scale with 4-level hierarchy
CI / build-and-push (push) Successful in 43s
CI / build-and-push (push) Successful in 43s
Replace flat DataCenter/Rack model with Cluster > Campus > Data Center > Racks hierarchy. Individual rack entities eliminated in favor of statistical batch simulation using deployment cohorts. Adds tiered network topology (ToR/agg/core) with proportional outage model, DC retrofitting, bulk operations, and drill-down UI navigation with breadcrumbs. First cluster and campus are free to preserve early game flow. Rebalances starting economy ($600K), funding rounds, and cohort scaling for hypercluster-scale gameplay. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,9 +10,14 @@ export function processEconomy(
|
||||
): EconomyState {
|
||||
const revenue = market.apiRevenue + market.subscriptionRevenue;
|
||||
|
||||
const infraExpenses = infrastructure.dataCenters.reduce((sum, dc) => {
|
||||
return sum + dc.energyCostPerTick + dc.maintenanceCostPerTick;
|
||||
}, 0);
|
||||
let infraExpenses = 0;
|
||||
for (const cluster of infrastructure.clusters) {
|
||||
for (const campus of cluster.campuses) {
|
||||
for (const dc of campus.dataCenters) {
|
||||
infraExpenses += dc.energyCostPerTick + dc.maintenanceCostPerTick;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const talentExpenses = state.talent.totalSalaryPerTick;
|
||||
const dataExpenses = state.data.partnerships.reduce((sum, p) => sum + p.costPerTick, 0);
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import type { GameState, InfrastructureState, DataCenter, RackOrder, Rack, PipelineStage } from '@ai-tycoon/shared';
|
||||
import type {
|
||||
GameState, InfrastructureState, Cluster, Campus, DataCenter,
|
||||
DeploymentCohort, NetworkHealthState, PipelineStage,
|
||||
} from '@ai-tycoon/shared';
|
||||
import {
|
||||
LOCATION_CONFIGS,
|
||||
RACK_SKU_CONFIGS,
|
||||
@@ -8,6 +11,10 @@ import {
|
||||
COOLING_FAILURE_REDUCTION,
|
||||
REDUNDANCY_FAILURE_REDUCTION,
|
||||
RACK_REPAIR_BASE_TICKS,
|
||||
NETWORK_TOPOLOGY,
|
||||
COHORT_SCALE_FACTOR,
|
||||
PIPELINE_ORDER_BASE_TICKS,
|
||||
networkSlotsRequired,
|
||||
} from '@ai-tycoon/shared';
|
||||
import type { TickNotification } from '../tick';
|
||||
|
||||
@@ -27,18 +34,21 @@ function nextStage(stage: PipelineStage): PipelineStage | 'production' {
|
||||
return PIPELINE_ADVANCE_ORDER[idx + 1];
|
||||
}
|
||||
|
||||
function stageTotal(stage: PipelineStage, order: RackOrder): number {
|
||||
const sku = RACK_SKU_CONFIGS[order.skuId];
|
||||
function cohortStageTotal(stage: PipelineStage, skuId: string, count: number): number {
|
||||
const sku = RACK_SKU_CONFIGS[skuId as keyof typeof RACK_SKU_CONFIGS];
|
||||
const timings = sku.pipelineTimeTicks;
|
||||
let base: number;
|
||||
switch (stage) {
|
||||
case 'manufacturing': return timings.manufacturing;
|
||||
case 'receiving': return timings.receiving;
|
||||
case 'installation': return timings.installation;
|
||||
case 'testing': return timings.testing;
|
||||
case 'repair': return RACK_REPAIR_BASE_TICKS;
|
||||
case 'decommission': return timings.installation;
|
||||
default: return 0;
|
||||
case 'ordered': base = PIPELINE_ORDER_BASE_TICKS; break;
|
||||
case 'manufacturing': base = timings.manufacturing; break;
|
||||
case 'receiving': base = timings.receiving; break;
|
||||
case 'installation': base = timings.installation; break;
|
||||
case 'testing': base = timings.testing; break;
|
||||
case 'repair': base = RACK_REPAIR_BASE_TICKS; break;
|
||||
case 'decommission': base = timings.installation; break;
|
||||
default: base = 0;
|
||||
}
|
||||
return Math.ceil(base * (1 + COHORT_SCALE_FACTOR * count));
|
||||
}
|
||||
|
||||
function stageSpeed(stage: PipelineStage, engEff: number, opsEff: number): number {
|
||||
@@ -52,235 +62,403 @@ function stageSpeed(stage: PipelineStage, engEff: number, opsEff: number): numbe
|
||||
}
|
||||
}
|
||||
|
||||
function binomialSample(n: number, p: number): number {
|
||||
if (n <= 0 || p <= 0) return 0;
|
||||
if (p >= 1) return n;
|
||||
const expected = n * p;
|
||||
const base = Math.floor(expected);
|
||||
const frac = expected - base;
|
||||
return base + (Math.random() < frac ? 1 : 0);
|
||||
}
|
||||
|
||||
function computeNetworkHealth(computeRacksOnline: number): NetworkHealthState {
|
||||
if (computeRacksOnline <= 0) {
|
||||
return { tier1Required: 0, tier1Healthy: 0, tier2Required: 0, tier2Healthy: 0, tier3Required: 0, tier3Healthy: 0, racksDisconnected: 0 };
|
||||
}
|
||||
const tier1 = Math.ceil(computeRacksOnline / NETWORK_TOPOLOGY.tier1PerCompute);
|
||||
const tier2 = Math.ceil(tier1 / NETWORK_TOPOLOGY.tier2PerTier1);
|
||||
const tier3 = NETWORK_TOPOLOGY.tier3PerDC;
|
||||
return {
|
||||
tier1Required: tier1,
|
||||
tier1Healthy: tier1,
|
||||
tier2Required: tier2,
|
||||
tier2Healthy: tier2,
|
||||
tier3Required: tier3,
|
||||
tier3Healthy: tier3,
|
||||
racksDisconnected: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function processNetworkFailures(
|
||||
nh: NetworkHealthState,
|
||||
computeRacksOnline: number,
|
||||
networkResearchBonus: number,
|
||||
): { networkHealth: NetworkHealthState; racksDisconnected: number } {
|
||||
if (computeRacksOnline <= 0) {
|
||||
return { networkHealth: nh, racksDisconnected: 0 };
|
||||
}
|
||||
|
||||
let racksDisconnected = 0;
|
||||
|
||||
const t1Rate = NETWORK_TOPOLOGY.tier1FailureRate * (1 - networkResearchBonus);
|
||||
const t1Failures = binomialSample(nh.tier1Required, t1Rate);
|
||||
const tier1Healthy = nh.tier1Required - t1Failures;
|
||||
racksDisconnected += t1Failures * NETWORK_TOPOLOGY.tier1BlastRadius;
|
||||
|
||||
const t2Rate = NETWORK_TOPOLOGY.tier2FailureRate * (1 - networkResearchBonus);
|
||||
const t2Failures = binomialSample(nh.tier2Required, t2Rate);
|
||||
const tier2Healthy = nh.tier2Required - t2Failures;
|
||||
racksDisconnected += t2Failures * NETWORK_TOPOLOGY.tier1BlastRadius * NETWORK_TOPOLOGY.tier2BlastRadiusMultiplier;
|
||||
|
||||
const t3Rate = NETWORK_TOPOLOGY.tier3FailureRate * (1 - networkResearchBonus);
|
||||
const t3Failures = binomialSample(nh.tier3Required, t3Rate);
|
||||
const tier3Healthy = nh.tier3Required - t3Failures;
|
||||
if (t3Failures > 0) {
|
||||
racksDisconnected = computeRacksOnline;
|
||||
}
|
||||
|
||||
racksDisconnected = Math.min(racksDisconnected, computeRacksOnline);
|
||||
|
||||
return {
|
||||
networkHealth: {
|
||||
...nh,
|
||||
tier1Healthy,
|
||||
tier2Healthy,
|
||||
tier3Healthy,
|
||||
racksDisconnected,
|
||||
},
|
||||
racksDisconnected,
|
||||
};
|
||||
}
|
||||
|
||||
export function processInfrastructure(state: GameState): InfraTickResult {
|
||||
const notifications: TickNotification[] = [];
|
||||
let repairCosts = 0;
|
||||
|
||||
const engEff = state.talent.departments.engineering.effectiveness;
|
||||
const opsEff = state.talent.departments.operations.effectiveness;
|
||||
|
||||
const qaResearchBonus = state.research.completedResearch.includes('quality-assurance') ? 0.25 : 0;
|
||||
const netResearch1 = state.research.completedResearch.includes('network-engineering-i') ? 0.4 : 0;
|
||||
const netResearch2 = state.research.completedResearch.includes('network-engineering-ii') ? 0.5 : 0;
|
||||
const networkResearchBonus = Math.min(0.8, netResearch1 + netResearch2);
|
||||
|
||||
// --- Phase 1: Advance DC Construction ---
|
||||
const dataCenters: DataCenter[] = state.infrastructure.dataCenters.map(dc => {
|
||||
if (dc.status !== 'constructing') return { ...dc };
|
||||
|
||||
const newProgress = dc.constructionProgress + 1;
|
||||
if (newProgress >= dc.constructionTotal) {
|
||||
notifications.push({
|
||||
title: 'Data Center Online',
|
||||
message: `${dc.name} is now operational!`,
|
||||
type: 'success',
|
||||
});
|
||||
return { ...dc, constructionProgress: dc.constructionTotal, status: 'operational' as const };
|
||||
}
|
||||
return { ...dc, constructionProgress: newProgress };
|
||||
});
|
||||
|
||||
// --- Phase 2: Advance Rack Pipeline ---
|
||||
const rackPipeline: RackOrder[] = [];
|
||||
const newRacks: Rack[] = [];
|
||||
|
||||
for (const order of state.infrastructure.rackPipeline) {
|
||||
const speed = stageSpeed(order.stage, engEff, opsEff);
|
||||
const newProgress = order.stageProgress + speed;
|
||||
|
||||
if (newProgress < order.stageTotal) {
|
||||
rackPipeline.push({ ...order, stageProgress: newProgress });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (order.stage === 'decommission') {
|
||||
const sku = RACK_SKU_CONFIGS[order.skuId];
|
||||
notifications.push({
|
||||
title: 'Rack Decommissioned',
|
||||
message: `${sku.name} rack has been fully decommissioned.`,
|
||||
type: 'info',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (order.stage === 'repair') {
|
||||
const total = stageTotal('testing', order);
|
||||
rackPipeline.push({
|
||||
...order,
|
||||
stage: 'testing',
|
||||
stageProgress: 0,
|
||||
stageTotal: total,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const next = nextStage(order.stage);
|
||||
|
||||
if (next === 'production') {
|
||||
const sku = RACK_SKU_CONFIGS[order.skuId];
|
||||
const dc = dataCenters.find(d => d.id === order.dataCenterId);
|
||||
const cooling = dc?.coolingLevel ?? 0;
|
||||
|
||||
const effectiveFailRate = sku.testFailureRate
|
||||
* (1 - cooling * COOLING_FAILURE_REDUCTION)
|
||||
* (1 - opsEff * 0.2)
|
||||
* (1 - qaResearchBonus);
|
||||
|
||||
if (Math.random() < effectiveFailRate) {
|
||||
const repairCost = sku.baseCost * sku.repairCostFraction;
|
||||
repairCosts += repairCost;
|
||||
rackPipeline.push({
|
||||
...order,
|
||||
stage: 'repair',
|
||||
stageProgress: 0,
|
||||
stageTotal: RACK_REPAIR_BASE_TICKS,
|
||||
repairCount: order.repairCount + 1,
|
||||
});
|
||||
notifications.push({
|
||||
title: 'Rack Failed Testing',
|
||||
message: `${sku.name} rack failed QA (attempt ${order.repairCount + 1}). Repair cost: $${repairCost.toLocaleString()}`,
|
||||
type: 'warning',
|
||||
});
|
||||
} else {
|
||||
newRacks.push({
|
||||
id: order.id,
|
||||
skuId: order.skuId,
|
||||
dataCenterId: order.dataCenterId,
|
||||
isHealthy: true,
|
||||
});
|
||||
notifications.push({
|
||||
title: 'Rack Online',
|
||||
message: `${sku.name} rack is now in production at ${dc?.name ?? 'data center'}.`,
|
||||
type: 'success',
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const total = stageTotal(next, order);
|
||||
rackPipeline.push({
|
||||
...order,
|
||||
stage: next,
|
||||
stageProgress: 0,
|
||||
stageTotal: total,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add newly completed racks to their data centers
|
||||
for (const rack of newRacks) {
|
||||
const dcIdx = dataCenters.findIndex(d => d.id === rack.dataCenterId);
|
||||
if (dcIdx !== -1) {
|
||||
dataCenters[dcIdx] = {
|
||||
...dataCenters[dcIdx],
|
||||
racks: [...dataCenters[dcIdx].racks, rack],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// --- Phase 3: Production Failures ---
|
||||
for (let dcIdx = 0; dcIdx < dataCenters.length; dcIdx++) {
|
||||
const dc = dataCenters[dcIdx];
|
||||
if (dc.status !== 'operational') continue;
|
||||
|
||||
const updatedRacks: Rack[] = [];
|
||||
for (const rack of dc.racks) {
|
||||
if (!rack.isHealthy) {
|
||||
updatedRacks.push(rack);
|
||||
continue;
|
||||
}
|
||||
|
||||
const sku = RACK_SKU_CONFIGS[rack.skuId];
|
||||
const effectiveRate = sku.productionFailureRate
|
||||
* (1 - dc.coolingLevel * COOLING_FAILURE_REDUCTION)
|
||||
* (1 - dc.redundancyLevel * REDUNDANCY_FAILURE_REDUCTION);
|
||||
|
||||
if (Math.random() < effectiveRate) {
|
||||
updatedRacks.push({ ...rack, isHealthy: false });
|
||||
const repairCost = sku.baseCost * sku.repairCostFraction;
|
||||
repairCosts += repairCost;
|
||||
|
||||
rackPipeline.push({
|
||||
id: rack.id,
|
||||
skuId: rack.skuId,
|
||||
dataCenterId: dc.id,
|
||||
stage: 'repair',
|
||||
stageProgress: 0,
|
||||
stageTotal: RACK_REPAIR_BASE_TICKS,
|
||||
totalCost: repairCost,
|
||||
repairCount: 0,
|
||||
});
|
||||
|
||||
notifications.push({
|
||||
title: 'Rack Failure',
|
||||
message: `${sku.name} rack failed in ${dc.name}. Sent for repair.`,
|
||||
type: 'danger',
|
||||
});
|
||||
} else {
|
||||
updatedRacks.push(rack);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove failed racks from the DC (they're now in the repair pipeline)
|
||||
dataCenters[dcIdx] = {
|
||||
...dc,
|
||||
racks: updatedRacks.filter(r => r.isHealthy),
|
||||
};
|
||||
}
|
||||
|
||||
// --- Phase 4: Compute Aggregates ---
|
||||
let totalFlops = 0;
|
||||
let totalUptime = 0;
|
||||
let totalRackCount = 0;
|
||||
let totalComputeRackCount = 0;
|
||||
let totalDataCenterCount = 0;
|
||||
let dcWithRacks = 0;
|
||||
|
||||
for (let dcIdx = 0; dcIdx < dataCenters.length; dcIdx++) {
|
||||
const dc = dataCenters[dcIdx];
|
||||
if (dc.status !== 'operational') continue;
|
||||
|
||||
const location = LOCATION_CONFIGS[dc.location];
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
|
||||
let dcFlops = 0;
|
||||
let usedPowerKW = 0;
|
||||
const repairingForDc = rackPipeline.filter(o => o.dataCenterId === dc.id && o.stage === 'repair').length;
|
||||
const healthyCount = dc.racks.length;
|
||||
const totalInDc = dc.racks.length + repairingForDc;
|
||||
|
||||
for (const rack of dc.racks) {
|
||||
const sku = RACK_SKU_CONFIGS[rack.skuId];
|
||||
dcFlops += sku.flopsPerRack;
|
||||
usedPowerKW += sku.powerDrawKW;
|
||||
const clusters: Cluster[] = state.infrastructure.clusters.map(cluster => {
|
||||
// Advance cluster construction
|
||||
if (cluster.status === 'constructing') {
|
||||
const newProgress = cluster.constructionProgress + 1;
|
||||
if (newProgress >= cluster.constructionTotal) {
|
||||
notifications.push({
|
||||
title: 'Cluster Online',
|
||||
message: `${cluster.name} cluster in ${LOCATION_CONFIGS[cluster.locationId].name} is now operational!`,
|
||||
type: 'success',
|
||||
});
|
||||
return { ...cluster, constructionProgress: cluster.constructionTotal, status: 'operational' as const, campuses: cluster.campuses };
|
||||
}
|
||||
return { ...cluster, constructionProgress: newProgress };
|
||||
}
|
||||
|
||||
const pipelineRacksForDc = rackPipeline.filter(o => o.dataCenterId === dc.id && o.stage !== 'decommission').length;
|
||||
const usedSlots = totalInDc + pipelineRacksForDc;
|
||||
const campuses: Campus[] = cluster.campuses.map(campus => {
|
||||
// Advance campus construction
|
||||
if (campus.status === 'constructing') {
|
||||
const newProgress = campus.constructionProgress + 1;
|
||||
if (newProgress >= campus.constructionTotal) {
|
||||
notifications.push({
|
||||
title: 'Campus Ready',
|
||||
message: `Campus ${campus.name} is now operational!`,
|
||||
type: 'success',
|
||||
});
|
||||
return { ...campus, constructionProgress: campus.constructionTotal, status: 'operational' as const, dataCenters: campus.dataCenters };
|
||||
}
|
||||
return { ...campus, constructionProgress: newProgress };
|
||||
}
|
||||
|
||||
const energyCostPerTick = (tierConfig.baseEnergyCostPerTick + usedPowerKW * BASE_ENERGY_COST_PER_FLOP)
|
||||
* location.energyCostMultiplier;
|
||||
const maintenanceCostPerTick = totalInDc * BASE_MAINTENANCE_PER_RACK;
|
||||
const dataCenters: DataCenter[] = campus.dataCenters.map(dc => {
|
||||
// Advance DC construction
|
||||
if (dc.status === 'constructing') {
|
||||
const newProgress = dc.constructionProgress + 1;
|
||||
if (newProgress >= dc.constructionTotal) {
|
||||
notifications.push({
|
||||
title: 'Data Center Online',
|
||||
message: `${dc.name} is now operational!`,
|
||||
type: 'success',
|
||||
});
|
||||
return { ...dc, constructionProgress: dc.constructionTotal, status: 'operational' as const };
|
||||
}
|
||||
return { ...dc, constructionProgress: newProgress };
|
||||
}
|
||||
|
||||
const currentUptime = totalInDc > 0 ? healthyCount / totalInDc : 1;
|
||||
let computeRacksOnline = dc.computeRacksOnline;
|
||||
let computeRacksFailed = dc.computeRacksFailed;
|
||||
let dcRepairCosts = 0;
|
||||
|
||||
totalFlops += dcFlops;
|
||||
totalRackCount += totalInDc;
|
||||
if (totalInDc > 0) {
|
||||
totalUptime += currentUptime;
|
||||
dcWithRacks++;
|
||||
}
|
||||
// Process retrofit
|
||||
if (dc.status === 'retrofitting' && dc.retrofitState) {
|
||||
const rs = { ...dc.retrofitState };
|
||||
rs.progress += (1 + opsEff * 0.1);
|
||||
|
||||
dataCenters[dcIdx] = {
|
||||
...dataCenters[dcIdx],
|
||||
usedSlots,
|
||||
usedPowerKW,
|
||||
energyCostPerTick,
|
||||
maintenanceCostPerTick,
|
||||
currentUptime,
|
||||
};
|
||||
}
|
||||
if (rs.progress >= rs.total) {
|
||||
if (rs.phase === 'decommissioning') {
|
||||
const installSku = RACK_SKU_CONFIGS[rs.toSkuId];
|
||||
const installTotal = cohortStageTotal('installation', rs.toSkuId, rs.racksRemaining);
|
||||
return {
|
||||
...dc,
|
||||
computeRacksOnline: 0,
|
||||
computeRacksFailed: 0,
|
||||
rackSkuId: rs.toSkuId,
|
||||
deploymentCohorts: [{
|
||||
id: `retrofit-${dc.id}-${Date.now()}`,
|
||||
count: rs.racksRemaining,
|
||||
skuId: rs.toSkuId,
|
||||
stage: 'installation' as PipelineStage,
|
||||
stageProgress: 0,
|
||||
stageTotal: installTotal,
|
||||
repairCount: 0,
|
||||
}],
|
||||
retrofitState: {
|
||||
...rs,
|
||||
phase: 'installing' as const,
|
||||
progress: 0,
|
||||
total: installTotal,
|
||||
},
|
||||
networkHealth: computeNetworkHealth(0),
|
||||
effectiveComputeRacks: 0,
|
||||
usedSlots: 0,
|
||||
usedPowerKW: 0,
|
||||
currentUptime: 0,
|
||||
energyCostPerTick: DC_TIER_CONFIGS[dc.tier].baseEnergyCostPerTick * LOCATION_CONFIGS[cluster.locationId].energyCostMultiplier,
|
||||
maintenanceCostPerTick: 0,
|
||||
};
|
||||
} else {
|
||||
notifications.push({
|
||||
title: 'Retrofit Complete',
|
||||
message: `${dc.name} retrofit to ${RACK_SKU_CONFIGS[rs.toSkuId].name} is complete!`,
|
||||
type: 'success',
|
||||
});
|
||||
return {
|
||||
...dc,
|
||||
status: 'operational' as const,
|
||||
retrofitState: null,
|
||||
};
|
||||
}
|
||||
}
|
||||
return { ...dc, retrofitState: rs };
|
||||
}
|
||||
|
||||
// Process deployment cohorts
|
||||
const updatedCohorts: DeploymentCohort[] = [];
|
||||
let racksJustOnlined = 0;
|
||||
let racksFailedTesting = 0;
|
||||
|
||||
for (const cohort of dc.deploymentCohorts) {
|
||||
const speed = stageSpeed(cohort.stage, engEff, opsEff);
|
||||
const newProgress = cohort.stageProgress + speed;
|
||||
|
||||
if (newProgress < cohort.stageTotal) {
|
||||
updatedCohorts.push({ ...cohort, stageProgress: newProgress });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cohort.stage === 'decommission') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cohort.stage === 'repair') {
|
||||
const testTotal = cohortStageTotal('testing', cohort.skuId, cohort.count);
|
||||
updatedCohorts.push({
|
||||
...cohort,
|
||||
stage: 'testing',
|
||||
stageProgress: 0,
|
||||
stageTotal: testTotal,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const next = nextStage(cohort.stage);
|
||||
|
||||
if (next === 'production') {
|
||||
const sku = RACK_SKU_CONFIGS[cohort.skuId];
|
||||
const effectiveFailRate = sku.testFailureRate
|
||||
* (1 - dc.coolingLevel * COOLING_FAILURE_REDUCTION)
|
||||
* (1 - opsEff * 0.2)
|
||||
* (1 - qaResearchBonus);
|
||||
|
||||
const failed = binomialSample(cohort.count, effectiveFailRate);
|
||||
const passed = cohort.count - failed;
|
||||
|
||||
racksJustOnlined += passed;
|
||||
|
||||
if (failed > 0) {
|
||||
racksFailedTesting += failed;
|
||||
const repairCost = sku.baseCost * sku.repairCostFraction * failed;
|
||||
dcRepairCosts += repairCost;
|
||||
|
||||
updatedCohorts.push({
|
||||
id: `repair-${cohort.id}`,
|
||||
count: failed,
|
||||
skuId: cohort.skuId,
|
||||
stage: 'repair',
|
||||
stageProgress: 0,
|
||||
stageTotal: cohortStageTotal('repair', cohort.skuId, failed),
|
||||
repairCount: cohort.repairCount + 1,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const total = cohortStageTotal(next, cohort.skuId, cohort.count);
|
||||
updatedCohorts.push({
|
||||
...cohort,
|
||||
stage: next,
|
||||
stageProgress: 0,
|
||||
stageTotal: total,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
computeRacksOnline += racksJustOnlined;
|
||||
|
||||
if (racksFailedTesting > 0) {
|
||||
const skuName = dc.rackSkuId ? RACK_SKU_CONFIGS[dc.rackSkuId].name : 'Unknown';
|
||||
notifications.push({
|
||||
title: 'Racks Failed Testing',
|
||||
message: `${dc.name}: ${racksFailedTesting} ${skuName} rack${racksFailedTesting > 1 ? 's' : ''} failed QA — repair batch created.`,
|
||||
type: 'warning',
|
||||
});
|
||||
}
|
||||
|
||||
if (racksJustOnlined > 0 && updatedCohorts.filter(c => c.stage !== 'repair').length === 0) {
|
||||
notifications.push({
|
||||
title: 'Deployment Complete',
|
||||
message: `${dc.name}: all racks deployed and online!`,
|
||||
type: 'success',
|
||||
});
|
||||
}
|
||||
|
||||
// Production failures (statistical)
|
||||
if (computeRacksOnline > 0 && dc.rackSkuId) {
|
||||
const sku = RACK_SKU_CONFIGS[dc.rackSkuId];
|
||||
const effectiveRate = sku.productionFailureRate
|
||||
* (1 - dc.coolingLevel * COOLING_FAILURE_REDUCTION)
|
||||
* (1 - dc.redundancyLevel * REDUNDANCY_FAILURE_REDUCTION);
|
||||
|
||||
const prodFailures = binomialSample(computeRacksOnline, effectiveRate);
|
||||
if (prodFailures > 0) {
|
||||
computeRacksOnline -= prodFailures;
|
||||
computeRacksFailed += prodFailures;
|
||||
const repairCost = sku.baseCost * sku.repairCostFraction * prodFailures;
|
||||
dcRepairCosts += repairCost;
|
||||
|
||||
updatedCohorts.push({
|
||||
id: `prodfail-${dc.id}-${Date.now()}`,
|
||||
count: prodFailures,
|
||||
skuId: dc.rackSkuId,
|
||||
stage: 'repair',
|
||||
stageProgress: 0,
|
||||
stageTotal: cohortStageTotal('repair', dc.rackSkuId, prodFailures),
|
||||
repairCount: 0,
|
||||
});
|
||||
|
||||
notifications.push({
|
||||
title: 'Production Failure',
|
||||
message: `${dc.name}: ${prodFailures} rack${prodFailures > 1 ? 's' : ''} failed in production — sent for repair.`,
|
||||
type: 'danger',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
repairCosts += dcRepairCosts;
|
||||
|
||||
// Network health
|
||||
const baseNetworkHealth = computeNetworkHealth(computeRacksOnline);
|
||||
const { networkHealth, racksDisconnected } = processNetworkFailures(
|
||||
baseNetworkHealth, computeRacksOnline, networkResearchBonus,
|
||||
);
|
||||
|
||||
if (racksDisconnected > 0) {
|
||||
if (networkHealth.tier3Healthy < networkHealth.tier3Required) {
|
||||
notifications.push({
|
||||
title: 'Core Network Failure',
|
||||
message: `${dc.name}: Tier-3 core switch failure — entire DC disconnected!`,
|
||||
type: 'danger',
|
||||
});
|
||||
} else if (racksDisconnected >= NETWORK_TOPOLOGY.tier1BlastRadius * NETWORK_TOPOLOGY.tier2BlastRadiusMultiplier) {
|
||||
notifications.push({
|
||||
title: 'Network Switch Failure',
|
||||
message: `${dc.name}: Tier-2 aggregation failure — ${racksDisconnected} racks disconnected.`,
|
||||
type: 'warning',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const effectiveComputeRacks = computeRacksOnline - racksDisconnected;
|
||||
|
||||
// Compute aggregates for this DC
|
||||
const location = LOCATION_CONFIGS[cluster.locationId];
|
||||
const tierConfig = DC_TIER_CONFIGS[dc.tier];
|
||||
const totalRacksInDc = computeRacksOnline + computeRacksFailed;
|
||||
const netSlots = networkSlotsRequired(computeRacksOnline);
|
||||
const pipelineRacks = updatedCohorts
|
||||
.filter(c => c.stage !== 'decommission' && c.stage !== 'repair')
|
||||
.reduce((sum, c) => sum + c.count, 0);
|
||||
const usedSlots = totalRacksInDc + netSlots + pipelineRacks;
|
||||
|
||||
let usedPowerKW = 0;
|
||||
let dcFlops = 0;
|
||||
if (dc.rackSkuId && computeRacksOnline > 0) {
|
||||
const sku = RACK_SKU_CONFIGS[dc.rackSkuId];
|
||||
usedPowerKW = computeRacksOnline * sku.powerDrawKW;
|
||||
dcFlops = effectiveComputeRacks * sku.flopsPerRack;
|
||||
}
|
||||
|
||||
const energyCostPerTick = (tierConfig.baseEnergyCostPerTick + usedPowerKW * BASE_ENERGY_COST_PER_FLOP)
|
||||
* location.energyCostMultiplier;
|
||||
const maintenanceCostPerTick = totalRacksInDc * BASE_MAINTENANCE_PER_RACK;
|
||||
|
||||
const currentUptime = totalRacksInDc > 0 ? effectiveComputeRacks / totalRacksInDc : 1;
|
||||
|
||||
totalFlops += dcFlops;
|
||||
totalRackCount += totalRacksInDc + netSlots;
|
||||
totalComputeRackCount += totalRacksInDc;
|
||||
totalDataCenterCount++;
|
||||
if (totalRacksInDc > 0) {
|
||||
totalUptime += currentUptime;
|
||||
dcWithRacks++;
|
||||
}
|
||||
|
||||
return {
|
||||
...dc,
|
||||
computeRacksOnline,
|
||||
computeRacksFailed,
|
||||
deploymentCohorts: updatedCohorts,
|
||||
networkHealth,
|
||||
effectiveComputeRacks,
|
||||
usedSlots,
|
||||
usedPowerKW,
|
||||
energyCostPerTick,
|
||||
maintenanceCostPerTick,
|
||||
currentUptime,
|
||||
};
|
||||
});
|
||||
|
||||
return { ...campus, dataCenters };
|
||||
});
|
||||
|
||||
return { ...cluster, campuses };
|
||||
});
|
||||
|
||||
return {
|
||||
infrastructure: {
|
||||
dataCenters,
|
||||
rackPipeline,
|
||||
clusters,
|
||||
totalFlops,
|
||||
totalUptime: dcWithRacks > 0 ? totalUptime / dcWithRacks : 1,
|
||||
totalRackCount,
|
||||
totalComputeRackCount,
|
||||
totalDataCenterCount,
|
||||
},
|
||||
notifications,
|
||||
repairCosts,
|
||||
|
||||
Reference in New Issue
Block a user