Cache serving pipeline fleet to eliminate per-tick rebuilds and reduce GC pressure

Fleet template is now rebuilt only when deploymentVersion changes (~68 times per 28,800-tick run instead of every tick). Reuses module-level Maps, arrays, and utilization objects instead of allocating new ones each tick. Replaces 4x Object.values().reduce() with single-pass aggregation and sorts fleet in-place. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-26 19:51:13 -04:00
parent bbb69a315c
commit 57a81be769
7 changed files with 190 additions and 99 deletions
@@ -1085,6 +1085,7 @@ export const useGameStore = create<Store>()(
            productLines: s.models.productLines.map(pl => ({
              ...pl, modelId, isActive: true,
            })),
            deploymentVersion: s.models.deploymentVersion + 1,
          },
          market: {
            ...s.market,
@@ -1104,6 +1105,7 @@ export const useGameStore = create<Store>()(
                ? { ...f, variants: f.variants.map(v => v.id === variantId ? { ...v, isDeployed: true } : v) }
                : f,
            ),
            deploymentVersion: s.models.deploymentVersion + 1,
          },
        }));
        get().addNotification({ title: 'Variant Deployed', message: 'Variant is now live.', type: 'success', tick: get().meta.tickCount });
@@ -3,6 +3,7 @@ export { processTick, setAchievementDefinitions } from './tick';
 export type { TickNotification } from './tick';
 export { getAvailableResearch, getResearchNode } from './systems/researchSystem';
 export { getResearchBonuses, resetResearchBonusCache } from './systems/researchBonuses';
 export { resetFleetCache } from './systems/market/servingPipeline';
 export type { ResearchBonuses } from './systems/researchBonuses';
 export { emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary } from './systems/infrastructureSystem';
 export { onModelDeployed } from './systems/market/obsolescenceSystem';
@@ -6,7 +6,7 @@ import type {
  ModelUtilizationEntry,
  BatchApiState,
 } from '@ai-tycoon/shared';
-import type { BaseModel, ModelVariant, ModelFamily, ModelsState, SizeTier } from '@ai-tycoon/shared';
+import type { BaseModel, ModelsState, SizeTier } from '@ai-tycoon/shared';
 import {
  MODEL_SIZE_THROUGHPUT_SCALER,
  MOE_SPEED_MULTIPLIER,
@@ -62,73 +62,133 @@ export interface ServingPipelineResult {
  batchRevenue: number;
 }
 interface CachedSlot {
  modelId: string;
  modelName: string;
  sizeTier: SizeTier;
  isVariant: boolean;
  quantization: string | null;
  qualityScore: number;
  speedMultiplier: number;
  throughputMultiplier: number;
  isMoE: boolean;
 }
 let cachedDeploymentVersion = -1;
 let cachedSlots: CachedSlot[] = [];
 const fleetOutput: ModelServingSlot[] = [];
 const mainRemaining = new Map<string, number>();
 const mainUsed = new Map<string, number>();
 const entRemaining = new Map<string, number>();
 const entUsed = new Map<string, number>();
 let cachedUtilization: ModelUtilizationEntry[] = [];
 export function resetFleetCache(): void {
  cachedDeploymentVersion = -1;
  cachedSlots.length = 0;
  fleetOutput.length = 0;
  mainRemaining.clear();
  mainUsed.clear();
  entRemaining.clear();
  entUsed.clear();
  cachedUtilization.length = 0;
 }
 function buildModelFleet(
  modelsState: ModelsState,
  effectiveInferenceFlops: number,
 ): ModelServingSlot[] {
-  const slots: ModelServingSlot[] = [];
+  const version = modelsState.deploymentVersion;
  if (version !== cachedDeploymentVersion) {
    cachedSlots.length = 0;
  const deployedBases: BaseModel[] = [];
    const baseModelById = new Map<string, BaseModel>();
    for (const m of modelsState.baseModels) {
    if (m.isDeployed) deployedBases.push(m);
      baseModelById.set(m.id, m);
      if (!m.isDeployed) continue;
      const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[m.sizeTier] ?? 1.0;
      const moeFactor = m.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
      cachedSlots.push({
        modelId: m.id,
        modelName: m.name,
        sizeTier: m.sizeTier,
        isVariant: false,
        quantization: null,
        qualityScore: m.rawCapability / 100,
        speedMultiplier: moeFactor,
        throughputMultiplier: FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor,
        isMoE: m.architecture.type === 'moe',
      });
    }
  const deployedVariants: { variant: ModelVariant; baseModel: BaseModel }[] = [];
    for (const family of modelsState.families) {
      for (const variant of family.variants) {
        if (!variant.isDeployed) continue;
        const base = baseModelById.get(variant.baseModelId);
-      if (base) deployedVariants.push({ variant, baseModel: base });
+        if (!base) continue;
-    }
+        const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[base.sizeTier] ?? 1.0;
  }
  const totalDeployed = deployedBases.length + deployedVariants.length;
  if (totalDeployed === 0 || effectiveInferenceFlops <= 0) return slots;
  const flopsPerModel = effectiveInferenceFlops / totalDeployed;
  for (const model of deployedBases) {
    const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[model.sizeTier] ?? 1.0;
    const moeFactor = model.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
    const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor;
    slots.push({
      modelId: model.id,
      modelName: model.name,
      sizeTier: model.sizeTier,
      isVariant: false,
      quantization: null,
      qualityScore: model.rawCapability / 100,
      speedMultiplier: moeFactor,
      throughputCapacity: throughput,
      isMoE: model.architecture.type === 'moe',
    });
  }
  for (const { variant, baseModel } of deployedVariants) {
    const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[baseModel.sizeTier] ?? 1.0;
        const moeFactor = variant.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
        const quantConfig = variant.quantization ? QUANTIZATION_CONFIGS[variant.quantization] : null;
        const quantSpeedFactor = quantConfig?.speedMultiplier ?? 1.0;
        const qualityRetention = quantConfig?.qualityRetention ?? 1.0;
-    const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor;
+        cachedSlots.push({
    slots.push({
          modelId: variant.id,
          modelName: variant.name,
-      sizeTier: baseModel.sizeTier,
+          sizeTier: base.sizeTier,
          isVariant: true,
          quantization: variant.quantization ?? null,
-      qualityScore: (baseModel.rawCapability / 100) * qualityRetention,
+          qualityScore: (base.rawCapability / 100) * qualityRetention,
          speedMultiplier: moeFactor * quantSpeedFactor,
-      throughputCapacity: throughput,
+          throughputMultiplier: FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor,
          isMoE: variant.architecture.type === 'moe',
        });
      }
    }
-  return slots;
+    cachedDeploymentVersion = version;
  }
  const totalDeployed = cachedSlots.length;
  if (totalDeployed === 0 || effectiveInferenceFlops <= 0) {
    fleetOutput.length = 0;
    return fleetOutput;
  }
  const flopsPerModel = effectiveInferenceFlops / totalDeployed;
  fleetOutput.length = totalDeployed;
  for (let i = 0; i < totalDeployed; i++) {
    const cs = cachedSlots[i];
    const existing = fleetOutput[i];
    if (existing) {
      existing.modelId = cs.modelId;
      existing.modelName = cs.modelName;
      existing.sizeTier = cs.sizeTier;
      existing.isVariant = cs.isVariant;
      existing.quantization = cs.quantization;
      existing.qualityScore = cs.qualityScore;
      existing.speedMultiplier = cs.speedMultiplier;
      existing.throughputCapacity = flopsPerModel * cs.throughputMultiplier;
      existing.isMoE = cs.isMoE;
    } else {
      fleetOutput[i] = {
        modelId: cs.modelId,
        modelName: cs.modelName,
        sizeTier: cs.sizeTier,
        isVariant: cs.isVariant,
        quantization: cs.quantization,
        qualityScore: cs.qualityScore,
        speedMultiplier: cs.speedMultiplier,
        throughputCapacity: flopsPerModel * cs.throughputMultiplier,
        isMoE: cs.isMoE,
      };
    }
  }
  return fleetOutput;
 }
 function sortFleetByStrategy(
@@ -136,24 +196,23 @@ function sortFleetByStrategy(
  strategy: string,
  overallUtilization: number,
 ): ModelServingSlot[] {
  const sorted = [...fleet];
  switch (strategy) {
    case 'quality-first':
-      sorted.sort((a, b) => b.qualityScore - a.qualityScore);
+      fleet.sort((a, b) => b.qualityScore - a.qualityScore);
      break;
    case 'speed-first':
-      sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
+      fleet.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
      break;
    case 'balanced':
    default:
      if (overallUtilization > 0.8) {
-        sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
+        fleet.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
      } else {
-        sorted.sort((a, b) => b.qualityScore - a.qualityScore);
+        fleet.sort((a, b) => b.qualityScore - a.qualityScore);
      }
      break;
  }
-  return sorted;
+  return fleet;
 }
 interface FleetState {
@@ -250,7 +309,8 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
  const { modelsState, effectiveInferenceFlops, overloadPolicy, demandByTier, batchApi, modelQuality, researchUnlocks } = input;
  const fleet = buildModelFleet(modelsState, effectiveInferenceFlops);
-  const totalFleetCapacity = fleet.reduce((sum, s) => sum + s.throughputCapacity, 0);
+  let totalFleetCapacity = 0;
  for (const s of fleet) totalFleetCapacity += s.throughputCapacity;
  if (fleet.length === 0 || totalFleetCapacity <= 0) {
    const metrics = makeInitialServingMetrics();
@@ -275,7 +335,7 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
    };
  }
-  const totalDemand = Object.values(demandByTier).reduce((s, v) => s + v, 0);
+  const totalDemand = demandByTier.enterprise + demandByTier['api-paid'] + demandByTier['consumer-paid'] + demandByTier['api-free'] + demandByTier['consumer-free'];
  const overallUtilization = totalFleetCapacity > 0 ? totalDemand / totalFleetCapacity : 0;
  const effectiveStrategy = researchUnlocks.servingRoutingUnlocked
@@ -284,10 +344,13 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
  const sortedFleet = sortFleetByStrategy(fleet, effectiveStrategy, overallUtilization);
-  const fleetState: FleetState = {
+  mainRemaining.clear();
-    remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
+  mainUsed.clear();
-    used: new Map(fleet.map(s => [s.modelId, 0])),
+  for (const s of fleet) {
-  };
+    mainRemaining.set(s.modelId, s.throughputCapacity);
    mainUsed.set(s.modelId, 0);
  }
  const fleetState: FleetState = { remaining: mainRemaining, used: mainUsed };
  const reservedCapacity = totalFleetCapacity * overloadPolicy.enterpriseReservation;
  const enterpriseDemand = demandByTier['enterprise'] ?? 0;
@@ -310,10 +373,13 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
  const nonEnterpriseTiers = effectivePriorityOrder.filter(t => t !== 'enterprise');
  if (enterpriseDemand > 0) {
-    const enterpriseFleetState: FleetState = {
+    entRemaining.clear();
-      remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
+    entUsed.clear();
-      used: new Map(fleet.map(s => [s.modelId, 0])),
+    for (const s of fleet) {
-    };
+      entRemaining.set(s.modelId, s.throughputCapacity);
      entUsed.set(s.modelId, 0);
    }
    const enterpriseFleetState: FleetState = { remaining: entRemaining, used: entUsed };
    const reserveLimit = reservedCapacity > 0 ? reservedCapacity : totalFleetCapacity;
    let budgetLeft = reserveLimit;
@@ -334,10 +400,10 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
    );
    for (const slot of fleet) {
-      const entUsed = enterpriseFleetState.used.get(slot.modelId) ?? 0;
+      const entUsedForModel = enterpriseFleetState.used.get(slot.modelId) ?? 0;
-      const mainRemaining = fleetState.remaining.get(slot.modelId) ?? 0;
+      const mainRemainingForModel = fleetState.remaining.get(slot.modelId) ?? 0;
-      fleetState.remaining.set(slot.modelId, Math.max(0, mainRemaining - entUsed + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0)));
+      fleetState.remaining.set(slot.modelId, Math.max(0, mainRemainingForModel - entUsedForModel + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0)));
-      fleetState.used.set(slot.modelId, entUsed);
+      fleetState.used.set(slot.modelId, entUsedForModel);
    }
  } else {
    tierResults['enterprise'] = { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
@@ -390,34 +456,50 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
    updatedBatchApi.revenue = batchRevenue;
  }
-  const totalServed = Object.values(tierResults).reduce((s, t) => s + t.servedTokens, 0);
+  let totalServed = 0;
-  const totalQueued = Object.values(tierResults).reduce((s, t) => s + t.queuedTokens, 0);
+  let totalQueued = 0;
-  const totalRejected = Object.values(tierResults).reduce((s, t) => s + t.rejectedTokens, 0);
+  let totalRejected = 0;
-  const totalDegraded = Object.values(tierResults).reduce((s, t) => s + t.degradedTokens, 0);
+  let totalDegraded = 0;
  let effectiveQuality = modelQuality;
  if (totalServed > 0) {
  let qualitySum = 0;
-    for (const t of Object.values(tierResults)) {
+  for (const tier of effectivePriorityOrder) {
    const t = tierResults[tier];
    if (!t) continue;
    totalServed += t.servedTokens;
    totalQueued += t.queuedTokens;
    totalRejected += t.rejectedTokens;
    totalDegraded += t.degradedTokens;
    qualitySum += t.avgQualityDelivered * t.servedTokens;
  }
-    effectiveQuality = qualitySum / totalServed;
+  const effectiveQuality = totalServed > 0 ? qualitySum / totalServed : modelQuality;
  }
  const queuedFraction = totalDemand > 0 ? totalQueued / totalDemand : 0;
  const avgLatencyMs = BASE_LATENCY_MS + queuedFraction * 100 * QUEUE_LATENCY_MS_PER_PERCENT;
-  const modelUtilization: ModelUtilizationEntry[] = fleet.map(slot => ({
+  cachedUtilization.length = fleet.length;
  for (let i = 0; i < fleet.length; i++) {
    const slot = fleet[i];
    const used = fleetState.used.get(slot.modelId) ?? 0;
    const existing = cachedUtilization[i];
    if (existing) {
      existing.modelId = slot.modelId;
      existing.modelName = slot.modelName;
      existing.quantization = slot.quantization;
      existing.qualityScore = slot.qualityScore;
      existing.throughputCapacity = slot.throughputCapacity;
      existing.throughputUsed = used;
      existing.utilization = slot.throughputCapacity > 0 ? Math.min(1, used / slot.throughputCapacity) : 0;
    } else {
      cachedUtilization[i] = {
        modelId: slot.modelId,
        modelName: slot.modelName,
        quantization: slot.quantization,
        qualityScore: slot.qualityScore,
        throughputCapacity: slot.throughputCapacity,
-    throughputUsed: fleetState.used.get(slot.modelId) ?? 0,
+        throughputUsed: used,
-    utilization: slot.throughputCapacity > 0
+        utilization: slot.throughputCapacity > 0 ? Math.min(1, used / slot.throughputCapacity) : 0,
-      ? Math.min(1, (fleetState.used.get(slot.modelId) ?? 0) / slot.throughputCapacity)
+      };
-      : 0,
+    }
-  }));
+  }
  const autoScaleBoost = researchUnlocks.autoScalingBonus;
  if (autoScaleBoost > 0) {
@@ -443,7 +525,7 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
      totalDegraded,
      effectiveQuality,
      avgLatencyMs,
-      modelUtilization,
+      modelUtilization: cachedUtilization,
      batchApiTokensServed: batchTokensServed,
      batchApiRevenue: batchRevenue,
    },
@@ -3,12 +3,14 @@ import { processTick, setAchievementDefinitions } from './tick';
 import { createTestState, createSeededRNG } from './__test-utils__';
 import { ACHIEVEMENT_DEFINITIONS } from './data/achievements';
 import { resetResearchBonusCache } from './systems/researchBonuses';
 import { resetFleetCache } from './systems/market/servingPipeline';
 const rng = createSeededRNG(42);
 beforeEach(() => {
  rng.install();
  resetResearchBonusCache();
  resetFleetCache();
  setAchievementDefinitions(ACHIEVEMENT_DEFINITIONS);
 });
 afterEach(() => rng.uninstall());
@@ -119,6 +119,7 @@ export function deployModel(state: GameState, modelId: string): boolean {
  if (!model) return false;
  model.isDeployed = true;
  state.models.deploymentVersion = (state.models.deploymentVersion ?? 0) + 1;
  for (const pl of state.models.productLines) {
    pl.modelId = modelId;
@@ -1,5 +1,5 @@
 import type { GameState } from '@ai-tycoon/shared';
-import { processTick, setAchievementDefinitions, ACHIEVEMENT_DEFINITIONS, resetResearchBonusCache } from '@ai-tycoon/game-engine';
+import { processTick, setAchievementDefinitions, ACHIEVEMENT_DEFINITIONS, resetResearchBonusCache, resetFleetCache } from '@ai-tycoon/game-engine';
 import type { TickNotification } from '@ai-tycoon/game-engine';
 import type { Strategy, SimulationMetrics } from './strategies/types';
 import { collectMetrics } from './analysis/metrics';
@@ -78,6 +78,7 @@ export function runSimulation(config: SimulationConfig): SimulationResult {
  resetIds();
  resetResearchBonusCache();
  resetFleetCache();
  let rng: ReturnType<typeof createSeededRNG> | null = null;
  if (config.seed !== undefined) {
@@ -211,6 +211,7 @@ export interface ModelsState {
  bestDeployedModelScore: number;
  bestDeployedSafetyScore: number;
  bestDeployedCapabilities: ModelCapabilities;
  deploymentVersion: number;
 }
 export const DEFAULT_DATA_MIX: DataMixAllocation = {
@@ -266,4 +267,5 @@ export const INITIAL_MODELS: ModelsState = {
  bestDeployedModelScore: 0,
  bestDeployedSafetyScore: 0,
  bestDeployedCapabilities: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
  deploymentVersion: 0,
 };