diff --git a/apps/web/src/store/index.ts b/apps/web/src/store/index.ts index 8087c70..f26fe0c 100644 --- a/apps/web/src/store/index.ts +++ b/apps/web/src/store/index.ts @@ -1085,6 +1085,7 @@ export const useGameStore = create()( productLines: s.models.productLines.map(pl => ({ ...pl, modelId, isActive: true, })), + deploymentVersion: s.models.deploymentVersion + 1, }, market: { ...s.market, @@ -1104,6 +1105,7 @@ export const useGameStore = create()( ? { ...f, variants: f.variants.map(v => v.id === variantId ? { ...v, isDeployed: true } : v) } : f, ), + deploymentVersion: s.models.deploymentVersion + 1, }, })); get().addNotification({ title: 'Variant Deployed', message: 'Variant is now live.', type: 'success', tick: get().meta.tickCount }); diff --git a/packages/game-engine/src/index.ts b/packages/game-engine/src/index.ts index a32d0ea..e7b88f2 100644 --- a/packages/game-engine/src/index.ts +++ b/packages/game-engine/src/index.ts @@ -3,6 +3,7 @@ export { processTick, setAchievementDefinitions } from './tick'; export type { TickNotification } from './tick'; export { getAvailableResearch, getResearchNode } from './systems/researchSystem'; export { getResearchBonuses, resetResearchBonusCache } from './systems/researchBonuses'; +export { resetFleetCache } from './systems/market/servingPipeline'; export type { ResearchBonuses } from './systems/researchBonuses'; export { emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary } from './systems/infrastructureSystem'; export { onModelDeployed } from './systems/market/obsolescenceSystem'; diff --git a/packages/game-engine/src/systems/market/servingPipeline.ts b/packages/game-engine/src/systems/market/servingPipeline.ts index 043b9c5..0079285 100644 --- a/packages/game-engine/src/systems/market/servingPipeline.ts +++ b/packages/game-engine/src/systems/market/servingPipeline.ts @@ -6,7 +6,7 @@ import type { ModelUtilizationEntry, BatchApiState, } from '@ai-tycoon/shared'; -import type { BaseModel, ModelVariant, ModelFamily, ModelsState, SizeTier } from '@ai-tycoon/shared'; +import type { BaseModel, ModelsState, SizeTier } from '@ai-tycoon/shared'; import { MODEL_SIZE_THROUGHPUT_SCALER, MOE_SPEED_MULTIPLIER, @@ -62,73 +62,133 @@ export interface ServingPipelineResult { batchRevenue: number; } +interface CachedSlot { + modelId: string; + modelName: string; + sizeTier: SizeTier; + isVariant: boolean; + quantization: string | null; + qualityScore: number; + speedMultiplier: number; + throughputMultiplier: number; + isMoE: boolean; +} + +let cachedDeploymentVersion = -1; +let cachedSlots: CachedSlot[] = []; +const fleetOutput: ModelServingSlot[] = []; + +const mainRemaining = new Map(); +const mainUsed = new Map(); +const entRemaining = new Map(); +const entUsed = new Map(); + +let cachedUtilization: ModelUtilizationEntry[] = []; + +export function resetFleetCache(): void { + cachedDeploymentVersion = -1; + cachedSlots.length = 0; + fleetOutput.length = 0; + mainRemaining.clear(); + mainUsed.clear(); + entRemaining.clear(); + entUsed.clear(); + cachedUtilization.length = 0; +} + function buildModelFleet( modelsState: ModelsState, effectiveInferenceFlops: number, ): ModelServingSlot[] { - const slots: ModelServingSlot[] = []; + const version = modelsState.deploymentVersion; - const deployedBases: BaseModel[] = []; - const baseModelById = new Map(); - for (const m of modelsState.baseModels) { - if (m.isDeployed) deployedBases.push(m); - baseModelById.set(m.id, m); - } + if (version !== cachedDeploymentVersion) { + cachedSlots.length = 0; - const deployedVariants: { variant: ModelVariant; baseModel: BaseModel }[] = []; - for (const family of modelsState.families) { - for (const variant of family.variants) { - if (!variant.isDeployed) continue; - const base = baseModelById.get(variant.baseModelId); - if (base) deployedVariants.push({ variant, baseModel: base }); + const baseModelById = new Map(); + for (const m of modelsState.baseModels) { + baseModelById.set(m.id, m); + if (!m.isDeployed) continue; + const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[m.sizeTier] ?? 1.0; + const moeFactor = m.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0; + cachedSlots.push({ + modelId: m.id, + modelName: m.name, + sizeTier: m.sizeTier, + isVariant: false, + quantization: null, + qualityScore: m.rawCapability / 100, + speedMultiplier: moeFactor, + throughputMultiplier: FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor, + isMoE: m.architecture.type === 'moe', + }); } + + for (const family of modelsState.families) { + for (const variant of family.variants) { + if (!variant.isDeployed) continue; + const base = baseModelById.get(variant.baseModelId); + if (!base) continue; + const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[base.sizeTier] ?? 1.0; + const moeFactor = variant.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0; + const quantConfig = variant.quantization ? QUANTIZATION_CONFIGS[variant.quantization] : null; + const quantSpeedFactor = quantConfig?.speedMultiplier ?? 1.0; + const qualityRetention = quantConfig?.qualityRetention ?? 1.0; + cachedSlots.push({ + modelId: variant.id, + modelName: variant.name, + sizeTier: base.sizeTier, + isVariant: true, + quantization: variant.quantization ?? null, + qualityScore: (base.rawCapability / 100) * qualityRetention, + speedMultiplier: moeFactor * quantSpeedFactor, + throughputMultiplier: FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor, + isMoE: variant.architecture.type === 'moe', + }); + } + } + + cachedDeploymentVersion = version; } - const totalDeployed = deployedBases.length + deployedVariants.length; - if (totalDeployed === 0 || effectiveInferenceFlops <= 0) return slots; + const totalDeployed = cachedSlots.length; + if (totalDeployed === 0 || effectiveInferenceFlops <= 0) { + fleetOutput.length = 0; + return fleetOutput; + } const flopsPerModel = effectiveInferenceFlops / totalDeployed; - for (const model of deployedBases) { - const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[model.sizeTier] ?? 1.0; - const moeFactor = model.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0; - const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor; - - slots.push({ - modelId: model.id, - modelName: model.name, - sizeTier: model.sizeTier, - isVariant: false, - quantization: null, - qualityScore: model.rawCapability / 100, - speedMultiplier: moeFactor, - throughputCapacity: throughput, - isMoE: model.architecture.type === 'moe', - }); + fleetOutput.length = totalDeployed; + for (let i = 0; i < totalDeployed; i++) { + const cs = cachedSlots[i]; + const existing = fleetOutput[i]; + if (existing) { + existing.modelId = cs.modelId; + existing.modelName = cs.modelName; + existing.sizeTier = cs.sizeTier; + existing.isVariant = cs.isVariant; + existing.quantization = cs.quantization; + existing.qualityScore = cs.qualityScore; + existing.speedMultiplier = cs.speedMultiplier; + existing.throughputCapacity = flopsPerModel * cs.throughputMultiplier; + existing.isMoE = cs.isMoE; + } else { + fleetOutput[i] = { + modelId: cs.modelId, + modelName: cs.modelName, + sizeTier: cs.sizeTier, + isVariant: cs.isVariant, + quantization: cs.quantization, + qualityScore: cs.qualityScore, + speedMultiplier: cs.speedMultiplier, + throughputCapacity: flopsPerModel * cs.throughputMultiplier, + isMoE: cs.isMoE, + }; + } } - for (const { variant, baseModel } of deployedVariants) { - const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[baseModel.sizeTier] ?? 1.0; - const moeFactor = variant.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0; - const quantConfig = variant.quantization ? QUANTIZATION_CONFIGS[variant.quantization] : null; - const quantSpeedFactor = quantConfig?.speedMultiplier ?? 1.0; - const qualityRetention = quantConfig?.qualityRetention ?? 1.0; - const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor; - - slots.push({ - modelId: variant.id, - modelName: variant.name, - sizeTier: baseModel.sizeTier, - isVariant: true, - quantization: variant.quantization ?? null, - qualityScore: (baseModel.rawCapability / 100) * qualityRetention, - speedMultiplier: moeFactor * quantSpeedFactor, - throughputCapacity: throughput, - isMoE: variant.architecture.type === 'moe', - }); - } - - return slots; + return fleetOutput; } function sortFleetByStrategy( @@ -136,24 +196,23 @@ function sortFleetByStrategy( strategy: string, overallUtilization: number, ): ModelServingSlot[] { - const sorted = [...fleet]; switch (strategy) { case 'quality-first': - sorted.sort((a, b) => b.qualityScore - a.qualityScore); + fleet.sort((a, b) => b.qualityScore - a.qualityScore); break; case 'speed-first': - sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity); + fleet.sort((a, b) => b.throughputCapacity - a.throughputCapacity); break; case 'balanced': default: if (overallUtilization > 0.8) { - sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity); + fleet.sort((a, b) => b.throughputCapacity - a.throughputCapacity); } else { - sorted.sort((a, b) => b.qualityScore - a.qualityScore); + fleet.sort((a, b) => b.qualityScore - a.qualityScore); } break; } - return sorted; + return fleet; } interface FleetState { @@ -250,7 +309,8 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe const { modelsState, effectiveInferenceFlops, overloadPolicy, demandByTier, batchApi, modelQuality, researchUnlocks } = input; const fleet = buildModelFleet(modelsState, effectiveInferenceFlops); - const totalFleetCapacity = fleet.reduce((sum, s) => sum + s.throughputCapacity, 0); + let totalFleetCapacity = 0; + for (const s of fleet) totalFleetCapacity += s.throughputCapacity; if (fleet.length === 0 || totalFleetCapacity <= 0) { const metrics = makeInitialServingMetrics(); @@ -275,7 +335,7 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe }; } - const totalDemand = Object.values(demandByTier).reduce((s, v) => s + v, 0); + const totalDemand = demandByTier.enterprise + demandByTier['api-paid'] + demandByTier['consumer-paid'] + demandByTier['api-free'] + demandByTier['consumer-free']; const overallUtilization = totalFleetCapacity > 0 ? totalDemand / totalFleetCapacity : 0; const effectiveStrategy = researchUnlocks.servingRoutingUnlocked @@ -284,10 +344,13 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe const sortedFleet = sortFleetByStrategy(fleet, effectiveStrategy, overallUtilization); - const fleetState: FleetState = { - remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])), - used: new Map(fleet.map(s => [s.modelId, 0])), - }; + mainRemaining.clear(); + mainUsed.clear(); + for (const s of fleet) { + mainRemaining.set(s.modelId, s.throughputCapacity); + mainUsed.set(s.modelId, 0); + } + const fleetState: FleetState = { remaining: mainRemaining, used: mainUsed }; const reservedCapacity = totalFleetCapacity * overloadPolicy.enterpriseReservation; const enterpriseDemand = demandByTier['enterprise'] ?? 0; @@ -310,10 +373,13 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe const nonEnterpriseTiers = effectivePriorityOrder.filter(t => t !== 'enterprise'); if (enterpriseDemand > 0) { - const enterpriseFleetState: FleetState = { - remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])), - used: new Map(fleet.map(s => [s.modelId, 0])), - }; + entRemaining.clear(); + entUsed.clear(); + for (const s of fleet) { + entRemaining.set(s.modelId, s.throughputCapacity); + entUsed.set(s.modelId, 0); + } + const enterpriseFleetState: FleetState = { remaining: entRemaining, used: entUsed }; const reserveLimit = reservedCapacity > 0 ? reservedCapacity : totalFleetCapacity; let budgetLeft = reserveLimit; @@ -334,10 +400,10 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe ); for (const slot of fleet) { - const entUsed = enterpriseFleetState.used.get(slot.modelId) ?? 0; - const mainRemaining = fleetState.remaining.get(slot.modelId) ?? 0; - fleetState.remaining.set(slot.modelId, Math.max(0, mainRemaining - entUsed + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0))); - fleetState.used.set(slot.modelId, entUsed); + const entUsedForModel = enterpriseFleetState.used.get(slot.modelId) ?? 0; + const mainRemainingForModel = fleetState.remaining.get(slot.modelId) ?? 0; + fleetState.remaining.set(slot.modelId, Math.max(0, mainRemainingForModel - entUsedForModel + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0))); + fleetState.used.set(slot.modelId, entUsedForModel); } } else { tierResults['enterprise'] = { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 }; @@ -390,34 +456,50 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe updatedBatchApi.revenue = batchRevenue; } - const totalServed = Object.values(tierResults).reduce((s, t) => s + t.servedTokens, 0); - const totalQueued = Object.values(tierResults).reduce((s, t) => s + t.queuedTokens, 0); - const totalRejected = Object.values(tierResults).reduce((s, t) => s + t.rejectedTokens, 0); - const totalDegraded = Object.values(tierResults).reduce((s, t) => s + t.degradedTokens, 0); - - let effectiveQuality = modelQuality; - if (totalServed > 0) { - let qualitySum = 0; - for (const t of Object.values(tierResults)) { - qualitySum += t.avgQualityDelivered * t.servedTokens; - } - effectiveQuality = qualitySum / totalServed; + let totalServed = 0; + let totalQueued = 0; + let totalRejected = 0; + let totalDegraded = 0; + let qualitySum = 0; + for (const tier of effectivePriorityOrder) { + const t = tierResults[tier]; + if (!t) continue; + totalServed += t.servedTokens; + totalQueued += t.queuedTokens; + totalRejected += t.rejectedTokens; + totalDegraded += t.degradedTokens; + qualitySum += t.avgQualityDelivered * t.servedTokens; } + const effectiveQuality = totalServed > 0 ? qualitySum / totalServed : modelQuality; const queuedFraction = totalDemand > 0 ? totalQueued / totalDemand : 0; const avgLatencyMs = BASE_LATENCY_MS + queuedFraction * 100 * QUEUE_LATENCY_MS_PER_PERCENT; - const modelUtilization: ModelUtilizationEntry[] = fleet.map(slot => ({ - modelId: slot.modelId, - modelName: slot.modelName, - quantization: slot.quantization, - qualityScore: slot.qualityScore, - throughputCapacity: slot.throughputCapacity, - throughputUsed: fleetState.used.get(slot.modelId) ?? 0, - utilization: slot.throughputCapacity > 0 - ? Math.min(1, (fleetState.used.get(slot.modelId) ?? 0) / slot.throughputCapacity) - : 0, - })); + cachedUtilization.length = fleet.length; + for (let i = 0; i < fleet.length; i++) { + const slot = fleet[i]; + const used = fleetState.used.get(slot.modelId) ?? 0; + const existing = cachedUtilization[i]; + if (existing) { + existing.modelId = slot.modelId; + existing.modelName = slot.modelName; + existing.quantization = slot.quantization; + existing.qualityScore = slot.qualityScore; + existing.throughputCapacity = slot.throughputCapacity; + existing.throughputUsed = used; + existing.utilization = slot.throughputCapacity > 0 ? Math.min(1, used / slot.throughputCapacity) : 0; + } else { + cachedUtilization[i] = { + modelId: slot.modelId, + modelName: slot.modelName, + quantization: slot.quantization, + qualityScore: slot.qualityScore, + throughputCapacity: slot.throughputCapacity, + throughputUsed: used, + utilization: slot.throughputCapacity > 0 ? Math.min(1, used / slot.throughputCapacity) : 0, + }; + } + } const autoScaleBoost = researchUnlocks.autoScalingBonus; if (autoScaleBoost > 0) { @@ -443,7 +525,7 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe totalDegraded, effectiveQuality, avgLatencyMs, - modelUtilization, + modelUtilization: cachedUtilization, batchApiTokensServed: batchTokensServed, batchApiRevenue: batchRevenue, }, diff --git a/packages/game-engine/src/tick.test.ts b/packages/game-engine/src/tick.test.ts index 65136c3..66ddfb8 100644 --- a/packages/game-engine/src/tick.test.ts +++ b/packages/game-engine/src/tick.test.ts @@ -3,12 +3,14 @@ import { processTick, setAchievementDefinitions } from './tick'; import { createTestState, createSeededRNG } from './__test-utils__'; import { ACHIEVEMENT_DEFINITIONS } from './data/achievements'; import { resetResearchBonusCache } from './systems/researchBonuses'; +import { resetFleetCache } from './systems/market/servingPipeline'; const rng = createSeededRNG(42); beforeEach(() => { rng.install(); resetResearchBonusCache(); + resetFleetCache(); setAchievementDefinitions(ACHIEVEMENT_DEFINITIONS); }); afterEach(() => rng.uninstall()); diff --git a/packages/game-simulation/src/actions/models.ts b/packages/game-simulation/src/actions/models.ts index 069b30a..563e97a 100644 --- a/packages/game-simulation/src/actions/models.ts +++ b/packages/game-simulation/src/actions/models.ts @@ -119,6 +119,7 @@ export function deployModel(state: GameState, modelId: string): boolean { if (!model) return false; model.isDeployed = true; + state.models.deploymentVersion = (state.models.deploymentVersion ?? 0) + 1; for (const pl of state.models.productLines) { pl.modelId = modelId; diff --git a/packages/game-simulation/src/runner.ts b/packages/game-simulation/src/runner.ts index 471b5b9..de4bde8 100644 --- a/packages/game-simulation/src/runner.ts +++ b/packages/game-simulation/src/runner.ts @@ -1,5 +1,5 @@ import type { GameState } from '@ai-tycoon/shared'; -import { processTick, setAchievementDefinitions, ACHIEVEMENT_DEFINITIONS, resetResearchBonusCache } from '@ai-tycoon/game-engine'; +import { processTick, setAchievementDefinitions, ACHIEVEMENT_DEFINITIONS, resetResearchBonusCache, resetFleetCache } from '@ai-tycoon/game-engine'; import type { TickNotification } from '@ai-tycoon/game-engine'; import type { Strategy, SimulationMetrics } from './strategies/types'; import { collectMetrics } from './analysis/metrics'; @@ -78,6 +78,7 @@ export function runSimulation(config: SimulationConfig): SimulationResult { resetIds(); resetResearchBonusCache(); + resetFleetCache(); let rng: ReturnType | null = null; if (config.seed !== undefined) { diff --git a/packages/shared/src/types/models.ts b/packages/shared/src/types/models.ts index c571249..04c28c7 100644 --- a/packages/shared/src/types/models.ts +++ b/packages/shared/src/types/models.ts @@ -211,6 +211,7 @@ export interface ModelsState { bestDeployedModelScore: number; bestDeployedSafetyScore: number; bestDeployedCapabilities: ModelCapabilities; + deploymentVersion: number; } export const DEFAULT_DATA_MIX: DataMixAllocation = { @@ -266,4 +267,5 @@ export const INITIAL_MODELS: ModelsState = { bestDeployedModelScore: 0, bestDeployedSafetyScore: 0, bestDeployedCapabilities: { reasoning: 0, coding: 0, creative: 0, math: 0, knowledge: 0, multimodal: 0, agents: 0, speed: 0, contextUtilization: 0 }, + deploymentVersion: 0, };