Cache serving pipeline fleet to eliminate per-tick rebuilds and reduce GC pressure

Fleet template is now rebuilt only when deploymentVersion changes (~68 times per
28,800-tick run instead of every tick). Reuses module-level Maps, arrays, and
utilization objects instead of allocating new ones each tick. Replaces 4x
Object.values().reduce() with single-pass aggregation and sorts fleet in-place.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 19:51:13 -04:00
parent bbb69a315c
commit 57a81be769
7 changed files with 190 additions and 99 deletions
+2
View File
@@ -211,6 +211,7 @@ export interface ModelsState {
bestDeployedModelScore: number;
bestDeployedSafetyScore: number;
bestDeployedCapabilities: ModelCapabilities;
deploymentVersion: number;
}
export const DEFAULT_DATA_MIX: DataMixAllocation = {
@@ -266,4 +267,5 @@ export const INITIAL_MODELS: ModelsState = {
bestDeployedModelScore: 0,
bestDeployedSafetyScore: 0,
bestDeployedCapabilities: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
deploymentVersion: 0,
};