Cache serving pipeline fleet to eliminate per-tick rebuilds and reduce GC pressure
Fleet template is now rebuilt only when deploymentVersion changes (~68 times per 28,800-tick run instead of every tick). Reuses module-level Maps, arrays, and utilization objects instead of allocating new ones each tick. Replaces 4x Object.values().reduce() with single-pass aggregation and sorts fleet in-place. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -211,6 +211,7 @@ export interface ModelsState {
|
||||
bestDeployedModelScore: number;
|
||||
bestDeployedSafetyScore: number;
|
||||
bestDeployedCapabilities: ModelCapabilities;
|
||||
deploymentVersion: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_DATA_MIX: DataMixAllocation = {
|
||||
@@ -266,4 +267,5 @@ export const INITIAL_MODELS: ModelsState = {
|
||||
bestDeployedModelScore: 0,
|
||||
bestDeployedSafetyScore: 0,
|
||||
bestDeployedCapabilities: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 },
|
||||
deploymentVersion: 0,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user