Replace decorative overload policy with real serving pipeline and dedicated Serving page
CI / build-and-push (push) Successful in 28s

The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read)
and trivial flat penalties. This replaces it with a full serving pipeline where deployed
models form a fleet, requests route through priority/degradation logic, and policy choices
create meaningful strategic tradeoffs.

New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers),
demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered
serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models
under load, and Batch API to fill idle capacity at discounted rates.

4 new research nodes gate features progressively: Intelligent Request Routing, Priority
Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline
metrics, model fleet utilization, and research-gated policy controls.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 12:42:09 -04:00
parent d7d77238b9
commit 901db02a6b
17 changed files with 1349 additions and 229 deletions
@@ -1,9 +1,10 @@
import type { ApiTierState, ApiTierId, DeveloperEcosystem } from '@ai-tycoon/shared';
import type { ApiTierState, ApiTierId, DeveloperEcosystem, TierServingMetrics } from '@ai-tycoon/shared';
import {
API_TIER_ORDER,
API_CONVERSION_RATES,
API_TIER_CHURN_RATES,
API_TOKENS_PER_DEVELOPER_PER_TICK,
REJECTION_CHURN_MULTIPLIER,
} from '@ai-tycoon/shared';
export interface ApiTickResult {
@@ -18,6 +19,8 @@ export function processApiTiers(
modelQuality: number,
seasonalApiMultiplier: number,
ecosystem: DeveloperEcosystem,
apiPaidMetrics: TierServingMetrics,
apiFreeMetrics: TierServingMetrics,
): ApiTickResult {
const updated: ApiTierState = {
tiers: { ...tiers.tiers },
@@ -89,6 +92,23 @@ export function processApiTiers(
updated.totalDevelopers = totalDevelopers;
updated.totalTokensPerTick = totalTokens;
const freeRejectRate = apiFreeMetrics.demandTokens > 0
? apiFreeMetrics.rejectedTokens / apiFreeMetrics.demandTokens : 0;
if (freeRejectRate > 0) {
const extraChurn = updated.tiers.free.developerCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
updated.tiers.free.developerCount = Math.max(0, updated.tiers.free.developerCount - extraChurn);
}
const paidRejectRate = apiPaidMetrics.demandTokens > 0
? apiPaidMetrics.rejectedTokens / apiPaidMetrics.demandTokens : 0;
if (paidRejectRate > 0) {
for (const id of API_TIER_ORDER) {
if (id === 'free') continue;
const extraChurn = updated.tiers[id].developerCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
updated.tiers[id].developerCount = Math.max(0, updated.tiers[id].developerCount - extraChurn);
}
}
return {
apiTiers: updated,
apiRevenue: Math.max(0, apiRevenue),
@@ -1,12 +1,13 @@
import type { ConsumerTierState, ConsumerTierId } from '@ai-tycoon/shared';
import type { ConsumerTierState, ConsumerTierId, TierServingMetrics } from '@ai-tycoon/shared';
import {
CONSUMER_TIER_ORDER,
CONVERSION_RATES,
TIER_CHURN_RATES,
FREE_TIER_ADOPTION_RATE,
CONSUMER_TOKENS_PER_SUBSCRIBER,
OVERLOAD_PENALTY_EXPONENT,
NETWORK_DEGRADATION,
REJECTION_CHURN_MULTIPLIER,
QUEUE_CHURN_MULTIPLIER,
} from '@ai-tycoon/shared';
export interface ConsumerTickResult {
@@ -20,9 +21,9 @@ export function processConsumerTiers(
playerConsumerCustomers: number,
modelQuality: number,
seasonalConsumerMultiplier: number,
demandCapacityRatio: number,
networkLatencyPenalty: number,
overloadPolicy: { degradeQualityUnderLoad: boolean; prioritizeEnterprise: boolean },
consumerPaidMetrics: TierServingMetrics,
consumerFreeMetrics: TierServingMetrics,
): ConsumerTickResult {
const updated = {
tiers: { ...tiers.tiers },
@@ -97,26 +98,64 @@ export function processConsumerTiers(
updated.totalUsers = totalUsers;
const paidDemand = consumerPaidMetrics.demandTokens;
const freeDemand = consumerFreeMetrics.demandTokens;
const totalDemand = paidDemand + freeDemand;
let servingPenalty = 0;
if (totalDemand > 0) {
const totalRejected = consumerPaidMetrics.rejectedTokens + consumerFreeMetrics.rejectedTokens;
const totalQueued = consumerPaidMetrics.queuedTokens + consumerFreeMetrics.queuedTokens;
const rejectedFraction = totalRejected / totalDemand;
const queuedFraction = totalQueued / totalDemand;
servingPenalty = rejectedFraction * 1.5 + queuedFraction * 0.5;
const avgQuality = totalDemand > 0
? (consumerPaidMetrics.avgQualityDelivered * paidDemand + consumerFreeMetrics.avgQualityDelivered * freeDemand) / totalDemand
: modelQuality;
const qualityGap = Math.max(0, modelQuality - avgQuality);
servingPenalty += qualityGap * 0.8;
if (consumerFreeMetrics.rejectedTokens > 0 && freeDemand > 0) {
const freeRejectRate = consumerFreeMetrics.rejectedTokens / freeDemand;
const extraChurn = updated.tiers.free.userCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
updated.tiers.free.userCount = Math.max(0, updated.tiers.free.userCount - extraChurn);
}
if (consumerPaidMetrics.rejectedTokens > 0 && paidDemand > 0) {
const paidRejectRate = consumerPaidMetrics.rejectedTokens / paidDemand;
for (const id of CONSUMER_TIER_ORDER) {
if (id === 'free') continue;
const extraChurn = updated.tiers[id].userCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
}
}
if (totalQueued > 0) {
for (const id of CONSUMER_TIER_ORDER) {
const extraChurn = updated.tiers[id].userCount * queuedFraction * 0.002 * QUEUE_CHURN_MULTIPLIER;
updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
}
}
}
let headroomBonus = 0;
let overloadPenalty = 0;
if (demandCapacityRatio <= 1) {
headroomBonus = (1 - demandCapacityRatio) * 0.2;
if (totalDemand > 0) {
const totalServed = consumerPaidMetrics.servedTokens + consumerFreeMetrics.servedTokens;
const servedFraction = totalServed / totalDemand;
if (servedFraction > 0.95) {
headroomBonus = (servedFraction - 0.95) * 4;
}
} else {
overloadPenalty = Math.min(1, Math.pow(demandCapacityRatio - 1, OVERLOAD_PENALTY_EXPONENT));
headroomBonus = 0.1;
}
const netLatencyPenalty = networkLatencyPenalty * NETWORK_DEGRADATION.satisfactionPenaltyPerLatency;
updated.satisfaction = Math.min(1, Math.max(0,
0.3 + modelQuality * 0.5 + headroomBonus - overloadPenalty - netLatencyPenalty,
0.3 + modelQuality * 0.5 + headroomBonus - servingPenalty - netLatencyPenalty,
));
if (overloadPolicy.degradeQualityUnderLoad && demandCapacityRatio > 0.85) {
updated.satisfaction = Math.max(0, updated.satisfaction - 0.02);
}
if (overloadPolicy.prioritizeEnterprise && demandCapacityRatio > 0.9) {
updated.satisfaction = Math.max(0, updated.satisfaction - 0.01);
}
updated.viralCoefficient = modelQuality > 0.5 ? 1 + (modelQuality - 0.5) * 2 : 0;
return {
@@ -5,6 +5,7 @@ import type {
EnterpriseSegment,
EnterprisePipelineStage,
DeveloperEcosystem,
TierServingMetrics,
} from '@ai-tycoon/shared';
import {
BASE_LEAD_RATE,
@@ -17,6 +18,7 @@ import {
ENTERPRISE_SLA_REQUIREMENTS,
ENTERPRISE_CAPABILITY_REQUIREMENTS,
ENTERPRISE_TOKENS_PER_TICK,
ENTERPRISE_REJECTION_SLA_MULTIPLIER,
} from '@ai-tycoon/shared';
import { ENTERPRISE_NAMES } from '../../data/enterpriseNames';
@@ -62,7 +64,7 @@ export function processEnterprisePipeline(
devEcosystem: DeveloperEcosystem,
seasonalEntMultiplier: number,
currentTick: number,
demandCapacityRatio: number,
enterpriseServingMetrics: TierServingMetrics,
): EnterprisePipelineResult {
const pipeline = [...ent.pipeline];
const activeContracts = [...ent.activeContracts];
@@ -129,7 +131,10 @@ export function processEnterprisePipeline(
if (lead.stage === 'qualification') {
transitionProb *= modelCapability >= lead.requiredCapability ? 1 : 0.1;
} else if (lead.stage === 'poc') {
transitionProb *= Math.max(0.2, 1 - Math.max(0, demandCapacityRatio - 0.9) * 5);
const entDemand = enterpriseServingMetrics.demandTokens;
const entRejected = enterpriseServingMetrics.rejectedTokens;
const rejectRate = entDemand > 0 ? entRejected / entDemand : 0;
transitionProb *= Math.max(0.2, 1 - rejectRate * 5);
} else if (lead.stage === 'negotiation') {
transitionProb *= Math.max(0.3, 1 - (lead.dealValue / 10_000_000) * 0.5);
}
@@ -181,14 +186,22 @@ export function processEnterprisePipeline(
const updated = { ...contract };
updated.totalTicks++;
if (demandCapacityRatio <= (1 / updated.slaUptime)) {
const entDemand = enterpriseServingMetrics.demandTokens;
const entServed = enterpriseServingMetrics.servedTokens;
const entRejected = enterpriseServingMetrics.rejectedTokens;
const servedFraction = entDemand > 0 ? entServed / entDemand : 1;
const wasRejected = entRejected > 0;
const qualityMet = enterpriseServingMetrics.avgQualityDelivered >= 0.85;
if (servedFraction >= updated.slaUptime && qualityMet && !wasRejected) {
updated.uptimeTicks++;
} else {
updated.slaViolations++;
const penalty = updated.pricePerMToken * (updated.tokensPerTick / 1_000_000) * SLA_PENALTY_FRACTION;
const severityMultiplier = wasRejected ? ENTERPRISE_REJECTION_SLA_MULTIPLIER : 1.0;
const penalty = updated.pricePerMToken * (updated.tokensPerTick / 1_000_000) * SLA_PENALTY_FRACTION * severityMultiplier;
slaPenalties += penalty;
updated.slaPenaltiesPaid += penalty;
updated.satisfaction = Math.max(0, updated.satisfaction - 0.005);
updated.satisfaction = Math.max(0, updated.satisfaction - (wasRejected ? 0.01 : 0.005));
}
if (updated.totalTicks > 0 && updated.slaViolations === 0) {
+106 -61
View File
@@ -1,5 +1,6 @@
import type { GameState, MarketState, BenchmarkResult, Competitor } from '@ai-tycoon/shared';
import { CONSUMER_TOKENS_PER_SUBSCRIBER } from '@ai-tycoon/shared';
import type { GameState, MarketState, BenchmarkResult } from '@ai-tycoon/shared';
import { CONSUMER_TOKENS_PER_SUBSCRIBER, API_TOKENS_PER_DEVELOPER_PER_TICK, BATCH_API_DEMAND_PER_DEV, makeInitialServingMetrics } from '@ai-tycoon/shared';
import type { TrafficPriority, TierServingMetrics } from '@ai-tycoon/shared';
import { BENCHMARKS } from '../../data/benchmarks';
import { computeSeasonal } from './seasonalSystem';
import { updateObsolescence } from './obsolescenceSystem';
@@ -9,6 +10,9 @@ import { processApiTiers } from './apiTierSystem';
import { processProductLines } from './productLines';
import { processDeveloperEcosystem } from './developerEcosystem';
import { processEnterprisePipeline } from './enterprisePipeline';
import { processServingPipeline } from './servingPipeline';
import type { DemandByTier } from './servingPipeline';
import type { ResearchBonuses } from '../researchBonuses';
export interface MarketTickResult {
marketState: MarketState;
@@ -44,24 +48,26 @@ function getSegmentQuality(
return weightedSum / totalWeight;
}
export function processMarketV2(state: GameState, currentTickCapacity: number): MarketTickResult {
export function processMarketV2(
state: GameState,
currentTickCapacity: number,
effectiveInferenceFlops?: number,
researchBonuses?: ResearchBonuses,
): MarketTickResult {
const consumerQuality = getSegmentQuality('consumer', state.models.benchmarkResults, state.models.bestDeployedModelScore);
const enterpriseQuality = getSegmentQuality('enterprise', state.models.benchmarkResults, state.models.bestDeployedModelScore);
const modelQuality = state.models.benchmarkResults.length > 0
? (consumerQuality + enterpriseQuality) / 2
: state.models.bestDeployedModelScore / 100;
// --- Seasonal ---
const seasonal = computeSeasonal(state.meta.tickCount);
// --- Obsolescence ---
const obsolescence = updateObsolescence(
state.market.obsolescence,
state.meta.currentEra,
state.meta.tickCount,
);
// --- Developer Ecosystem ---
const freeApiDevs = state.market.apiTiers.tiers.free.developerCount;
const totalApiDevs = state.market.apiTiers.totalDevelopers;
const engineeringCount = state.talent.departments.engineering.headcount;
@@ -75,7 +81,6 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
state.meta.currentEra,
);
// --- TAM & Market Shares ---
const chatProduct = state.models.productLines.find(p => p.type === 'chat-product');
const textApi = state.models.productLines.find(p => p.type === 'text-api');
@@ -106,32 +111,7 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
const playerDevCustomers = tam.segments.developer.shares.find(s => s.playerId === 'player')?.customers ?? 0;
const playerEntCustomers = tam.segments.enterprise.shares.find(s => s.playerId === 'player')?.customers ?? 0;
// --- Consumer Tiers ---
const consumerDemandEstimate = state.market.consumerTiers.totalUsers * CONSUMER_TOKENS_PER_SUBSCRIBER;
const demandCapacityRatio = currentTickCapacity > 0
? consumerDemandEstimate / currentTickCapacity
: consumerDemandEstimate > 0 ? 10 : 0;
const consumerResult = processConsumerTiers(
state.market.consumerTiers,
playerConsumerCustomers,
modelQuality,
seasonal.multipliers.consumer,
demandCapacityRatio,
state.infrastructure.networkLatencyPenalty,
state.market.overloadPolicy,
);
// --- API Tiers ---
const apiResult = processApiTiers(
state.market.apiTiers,
playerDevCustomers,
modelQuality,
seasonal.multipliers.api,
devEcosystem,
);
// --- Product Lines ---
// --- Product Lines (compute first to get token demand) ---
const productResult = processProductLines(
state.market.codeAssistant,
state.market.agentsPlatform,
@@ -142,22 +122,103 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
seasonal.multipliers.enterprise,
);
// --- Enterprise Pipeline ---
// --- Pre-compute demand estimates by tier for serving pipeline ---
const consumerTiers = state.market.consumerTiers;
const apiTiers = state.market.apiTiers;
const enterprise = state.market.enterprise;
const consumerPaidTokens = (consumerTiers.tiers.plus.userCount + consumerTiers.tiers.pro.userCount + consumerTiers.tiers.team.userCount) * CONSUMER_TOKENS_PER_SUBSCRIBER;
const consumerFreeTokens = consumerTiers.tiers.free.userCount * CONSUMER_TOKENS_PER_SUBSCRIBER;
const apiPaidTokens =
apiTiers.tiers.payg.developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK.payg
+ apiTiers.tiers.scale.developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK.scale
+ apiTiers.tiers['enterprise-api'].developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK['enterprise-api']
+ productResult.codeAssistantTokenDemand;
const apiFreeTokens = apiTiers.tiers.free.developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK.free;
let enterpriseTokens = 0;
for (const contract of enterprise.activeContracts) {
enterpriseTokens += contract.tokensPerTick;
}
enterpriseTokens += productResult.agentsPlatformTokenDemand;
const demandByTier: DemandByTier = {
'enterprise': enterpriseTokens,
'api-paid': apiPaidTokens,
'consumer-paid': consumerPaidTokens,
'api-free': apiFreeTokens,
'consumer-free': consumerFreeTokens,
};
// --- Batch API demand ---
let batchDemand = 0;
if (state.market.overloadPolicy.batchApiEnabled) {
for (const id of ['free', 'payg', 'scale', 'enterprise-api'] as const) {
batchDemand += apiTiers.tiers[id].developerCount * (BATCH_API_DEMAND_PER_DEV[id] ?? 0);
}
batchDemand *= Math.max(0.1, modelQuality);
}
const completedResearch = state.research?.completedResearch ?? [];
// --- Serving Pipeline ---
const servingResult = processServingPipeline({
modelsState: state.models,
effectiveInferenceFlops: effectiveInferenceFlops ?? currentTickCapacity,
overloadPolicy: state.market.overloadPolicy,
demandByTier,
batchApi: {
...state.market.batchApi,
totalBatchDemand: batchDemand,
},
modelQuality,
researchUnlocks: {
servingRoutingUnlocked: completedResearch.includes('request-routing'),
priorityQueuesUnlocked: completedResearch.includes('priority-queues'),
batchApiUnlocked: completedResearch.includes('request-batching'),
autoScalingBonus: completedResearch.includes('auto-scaling') ? 0.2 : 0,
},
});
const sm = servingResult.servingMetrics;
// --- Consumer Tiers (now with serving metrics) ---
const consumerResult = processConsumerTiers(
state.market.consumerTiers,
playerConsumerCustomers,
modelQuality,
seasonal.multipliers.consumer,
state.infrastructure.networkLatencyPenalty,
sm.tierMetrics['consumer-paid'],
sm.tierMetrics['consumer-free'],
);
// --- API Tiers (now with serving metrics) ---
const apiResult = processApiTiers(
state.market.apiTiers,
playerDevCustomers,
modelQuality,
seasonal.multipliers.api,
devEcosystem,
sm.tierMetrics['api-paid'],
sm.tierMetrics['api-free'],
);
// --- Enterprise Pipeline (now with serving metrics) ---
const salesDept = state.talent.departments.sales;
const salesHeadcount = salesDept.headcount;
const salesEffectiveness = salesDept.effectiveness;
const enterpriseResult = processEnterprisePipeline(
state.market.enterprise,
state.reputation.score,
state.models.bestDeployedModelScore,
state.models.bestDeployedSafetyScore,
salesHeadcount,
salesEffectiveness,
salesDept.headcount,
salesDept.effectiveness,
devEcosystem,
seasonal.multipliers.enterprise,
state.meta.tickCount,
demandCapacityRatio,
sm.tierMetrics['enterprise'],
);
// --- Aggregate revenue ---
@@ -165,9 +226,10 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
+ productResult.codeAssistantRevenue
+ productResult.agentsPlatformRevenue;
const apiRevenue = apiResult.apiRevenue
let apiRevenue = apiResult.apiRevenue
+ enterpriseResult.contractRevenue
- enterpriseResult.slaPenalties;
- enterpriseResult.slaPenalties
+ servingResult.batchRevenue;
const totalTokenDemand = consumerResult.totalConsumerTokenDemand
+ apiResult.totalApiTokenDemand
@@ -186,26 +248,7 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
const openSourceCount = state.market.openSourcedModels.length;
if (openSourceCount > 0) {
const revenueReduction = openSourceCount * 0.10 * 0.3;
const adjustedApiRevenue = apiRevenue * (1 - revenueReduction);
return {
marketState: {
...state.market,
tam,
consumerTiers: consumerResult.consumerTiers,
apiTiers: apiResult.apiTiers,
codeAssistant: productResult.codeAssistant,
agentsPlatform: productResult.agentsPlatform,
enterprise: enterpriseResult.enterprise,
developerEcosystem: devEcosystem,
seasonalPhase: seasonal.phase,
seasonalMultiplier: seasonal.multipliers.consumer,
obsolescence,
subscriberHistory,
},
apiRevenue: Math.max(0, adjustedApiRevenue),
subscriptionRevenue,
totalTokenDemand,
};
apiRevenue = apiRevenue * (1 - revenueReduction);
}
return {
@@ -221,6 +264,8 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
seasonalPhase: seasonal.phase,
seasonalMultiplier: seasonal.multipliers.consumer,
obsolescence,
servingMetrics: sm,
batchApi: servingResult.batchApi,
subscriberHistory,
},
apiRevenue: Math.max(0, apiRevenue),
@@ -0,0 +1,462 @@
import type {
OverloadPolicy,
TrafficPriority,
TierServingMetrics,
ServingMetrics,
ModelUtilizationEntry,
BatchApiState,
} from '@ai-tycoon/shared';
import type { BaseModel, ModelVariant, ModelFamily, ModelsState, SizeTier } from '@ai-tycoon/shared';
import {
MODEL_SIZE_THROUGHPUT_SCALER,
MOE_SPEED_MULTIPLIER,
FLOPS_TO_TOKENS_MULTIPLIER,
QUANTIZATION_CONFIGS,
REJECTION_SATISFACTION_PENALTY,
QUEUE_SATISFACTION_PENALTY,
DEGRADATION_SATISFACTION_PENALTY,
BASE_LATENCY_MS,
QUEUE_LATENCY_MS_PER_PERCENT,
BATCH_API_MAX_PENDING,
} from '@ai-tycoon/shared';
import { makeInitialServingMetrics } from '@ai-tycoon/shared';
export interface ModelServingSlot {
modelId: string;
modelName: string;
sizeTier: SizeTier;
isVariant: boolean;
quantization: string | null;
qualityScore: number;
speedMultiplier: number;
throughputCapacity: number;
isMoE: boolean;
}
export interface DemandByTier {
enterprise: number;
'api-paid': number;
'consumer-paid': number;
'api-free': number;
'consumer-free': number;
}
export interface ServingPipelineInput {
modelsState: ModelsState;
effectiveInferenceFlops: number;
overloadPolicy: OverloadPolicy;
demandByTier: DemandByTier;
batchApi: BatchApiState;
modelQuality: number;
researchUnlocks: {
servingRoutingUnlocked: boolean;
priorityQueuesUnlocked: boolean;
batchApiUnlocked: boolean;
autoScalingBonus: number;
};
}
export interface ServingPipelineResult {
servingMetrics: ServingMetrics;
batchApi: BatchApiState;
batchRevenue: number;
}
function buildModelFleet(
modelsState: ModelsState,
effectiveInferenceFlops: number,
): ModelServingSlot[] {
const slots: ModelServingSlot[] = [];
const deployedBases = modelsState.baseModels.filter(m => m.isDeployed);
const deployedVariants: { variant: ModelVariant; baseModel: BaseModel }[] = [];
for (const family of modelsState.families) {
for (const variant of family.variants) {
if (!variant.isDeployed) continue;
const base = modelsState.baseModels.find(m => m.id === variant.baseModelId);
if (base) deployedVariants.push({ variant, baseModel: base });
}
}
const totalDeployed = deployedBases.length + deployedVariants.length;
if (totalDeployed === 0 || effectiveInferenceFlops <= 0) return slots;
const flopsPerModel = effectiveInferenceFlops / totalDeployed;
for (const model of deployedBases) {
const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[model.sizeTier] ?? 1.0;
const moeFactor = model.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor;
slots.push({
modelId: model.id,
modelName: model.name,
sizeTier: model.sizeTier,
isVariant: false,
quantization: null,
qualityScore: model.rawCapability / 100,
speedMultiplier: moeFactor,
throughputCapacity: throughput,
isMoE: model.architecture.type === 'moe',
});
}
for (const { variant, baseModel } of deployedVariants) {
const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[baseModel.sizeTier] ?? 1.0;
const moeFactor = variant.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
const quantConfig = variant.quantization ? QUANTIZATION_CONFIGS[variant.quantization] : null;
const quantSpeedFactor = quantConfig?.speedMultiplier ?? 1.0;
const qualityRetention = quantConfig?.qualityRetention ?? 1.0;
const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor;
slots.push({
modelId: variant.id,
modelName: variant.name,
sizeTier: baseModel.sizeTier,
isVariant: true,
quantization: variant.quantization ?? null,
qualityScore: (baseModel.rawCapability / 100) * qualityRetention,
speedMultiplier: moeFactor * quantSpeedFactor,
throughputCapacity: throughput,
isMoE: variant.architecture.type === 'moe',
});
}
return slots;
}
function sortFleetByStrategy(
fleet: ModelServingSlot[],
strategy: string,
overallUtilization: number,
): ModelServingSlot[] {
const sorted = [...fleet];
switch (strategy) {
case 'quality-first':
sorted.sort((a, b) => b.qualityScore - a.qualityScore);
break;
case 'speed-first':
sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
break;
case 'balanced':
default:
if (overallUtilization > 0.8) {
sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
} else {
sorted.sort((a, b) => b.qualityScore - a.qualityScore);
}
break;
}
return sorted;
}
interface FleetState {
remaining: Map<string, number>;
used: Map<string, number>;
}
function serveFromFleet(
demand: number,
fleet: ModelServingSlot[],
fleetState: FleetState,
policy: OverloadPolicy,
tier: TrafficPriority,
overallUtilization: number,
): TierServingMetrics {
if (demand <= 0) {
return { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
}
let remaining = demand;
let served = 0;
let degraded = 0;
let qualityWeightedSum = 0;
const bestQuality = fleet.length > 0 ? Math.max(...fleet.map(s => s.qualityScore)) : 1;
const degradationActive = policy.autoDegradation.enabled && overallUtilization > policy.autoDegradation.triggerThreshold;
for (const slot of fleet) {
if (remaining <= 0) break;
const isDegraded = slot.qualityScore < bestQuality * 0.95;
if (isDegraded && !degradationActive) continue;
if (isDegraded && slot.qualityScore < policy.autoDegradation.minQualityFloor) continue;
const available = fleetState.remaining.get(slot.modelId) ?? 0;
if (available <= 0) continue;
const toServe = Math.min(remaining, available);
fleetState.remaining.set(slot.modelId, available - toServe);
fleetState.used.set(slot.modelId, (fleetState.used.get(slot.modelId) ?? 0) + toServe);
served += toServe;
if (isDegraded) degraded += toServe;
qualityWeightedSum += toServe * slot.qualityScore;
remaining -= toServe;
}
let queued = 0;
let rejected = 0;
if (remaining > 0) {
const behavior = policy.overflowBehavior[tier];
switch (behavior) {
case 'queue':
queued = remaining;
break;
case 'reject':
rejected = remaining;
break;
case 'degrade':
for (const slot of fleet) {
if (remaining <= 0) break;
const available = fleetState.remaining.get(slot.modelId) ?? 0;
if (available <= 0) continue;
const toServe = Math.min(remaining, available);
fleetState.remaining.set(slot.modelId, available - toServe);
fleetState.used.set(slot.modelId, (fleetState.used.get(slot.modelId) ?? 0) + toServe);
served += toServe;
degraded += toServe;
qualityWeightedSum += toServe * slot.qualityScore;
remaining -= toServe;
}
rejected = remaining;
break;
}
}
const avgQuality = served > 0 ? qualityWeightedSum / served : bestQuality;
return {
demandTokens: demand,
servedTokens: served,
queuedTokens: queued,
rejectedTokens: rejected,
degradedTokens: degraded,
avgQualityDelivered: avgQuality,
};
}
export function processServingPipeline(input: ServingPipelineInput): ServingPipelineResult {
const { modelsState, effectiveInferenceFlops, overloadPolicy, demandByTier, batchApi, modelQuality, researchUnlocks } = input;
const fleet = buildModelFleet(modelsState, effectiveInferenceFlops);
const totalFleetCapacity = fleet.reduce((sum, s) => sum + s.throughputCapacity, 0);
if (fleet.length === 0 || totalFleetCapacity <= 0) {
const metrics = makeInitialServingMetrics();
for (const tier of Object.keys(demandByTier) as TrafficPriority[]) {
const demand = demandByTier[tier] ?? 0;
if (demand > 0) {
metrics.tierMetrics[tier] = {
demandTokens: demand,
servedTokens: 0,
queuedTokens: 0,
rejectedTokens: demand,
degradedTokens: 0,
avgQualityDelivered: 0,
};
metrics.totalRejected += demand;
}
}
return {
servingMetrics: metrics,
batchApi: { ...batchApi, servedLastTick: 0, revenue: 0 },
batchRevenue: 0,
};
}
const totalDemand = Object.values(demandByTier).reduce((s, v) => s + v, 0);
const overallUtilization = totalFleetCapacity > 0 ? totalDemand / totalFleetCapacity : 0;
const effectiveStrategy = researchUnlocks.servingRoutingUnlocked
? overloadPolicy.routingStrategy
: 'balanced';
const sortedFleet = sortFleetByStrategy(fleet, effectiveStrategy, overallUtilization);
const fleetState: FleetState = {
remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
used: new Map(fleet.map(s => [s.modelId, 0])),
};
const reservedCapacity = totalFleetCapacity * overloadPolicy.enterpriseReservation;
const enterpriseDemand = demandByTier['enterprise'] ?? 0;
if (reservedCapacity > 0 && enterpriseDemand > 0) {
const reservePerModel = reservedCapacity / fleet.length;
for (const slot of sortedFleet) {
const current = fleetState.remaining.get(slot.modelId) ?? 0;
const reserved = Math.min(reservePerModel, current);
fleetState.remaining.set(slot.modelId, current - reserved);
}
}
const effectivePriorityOrder = researchUnlocks.priorityQueuesUnlocked
? overloadPolicy.priorityOrder
: ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'] as TrafficPriority[];
const tierResults: Record<TrafficPriority, TierServingMetrics> = {} as Record<TrafficPriority, TierServingMetrics>;
const nonEnterpriseTiers = effectivePriorityOrder.filter(t => t !== 'enterprise');
if (enterpriseDemand > 0) {
const enterpriseFleetState: FleetState = {
remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
used: new Map(fleet.map(s => [s.modelId, 0])),
};
const reserveLimit = reservedCapacity > 0 ? reservedCapacity : totalFleetCapacity;
let budgetLeft = reserveLimit;
for (const slot of sortedFleet) {
const cap = slot.throughputCapacity;
const alloc = Math.min(cap, budgetLeft);
enterpriseFleetState.remaining.set(slot.modelId, alloc);
budgetLeft -= alloc;
if (budgetLeft <= 0) break;
}
const effectiveEntDemand = researchUnlocks.servingRoutingUnlocked
? Math.min(enterpriseDemand, overloadPolicy.rateLimitPerCustomer['enterprise'] * 100)
: enterpriseDemand;
tierResults['enterprise'] = serveFromFleet(
effectiveEntDemand, sortedFleet, enterpriseFleetState, overloadPolicy, 'enterprise', overallUtilization,
);
for (const slot of fleet) {
const entUsed = enterpriseFleetState.used.get(slot.modelId) ?? 0;
const mainRemaining = fleetState.remaining.get(slot.modelId) ?? 0;
fleetState.remaining.set(slot.modelId, Math.max(0, mainRemaining - entUsed + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0)));
fleetState.used.set(slot.modelId, entUsed);
}
} else {
tierResults['enterprise'] = { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
if (reservedCapacity > 0) {
const reservePerModel = reservedCapacity / fleet.length;
for (const slot of fleet) {
const current = fleetState.remaining.get(slot.modelId) ?? 0;
fleetState.remaining.set(slot.modelId, current + reservePerModel);
}
}
}
for (const tier of nonEnterpriseTiers) {
const rawDemand = demandByTier[tier] ?? 0;
const effectiveDemand = researchUnlocks.servingRoutingUnlocked
? Math.min(rawDemand, overloadPolicy.rateLimitPerCustomer[tier] * 100)
: rawDemand;
tierResults[tier] = serveFromFleet(
effectiveDemand, sortedFleet, fleetState, overloadPolicy, tier, overallUtilization,
);
}
for (const tier of effectivePriorityOrder) {
if (!(tier in tierResults)) {
tierResults[tier] = { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
}
}
let batchTokensServed = 0;
let batchRevenue = 0;
const updatedBatchApi = { ...batchApi };
if (overloadPolicy.batchApiEnabled && researchUnlocks.batchApiUnlocked) {
let idleCapacity = 0;
for (const slot of fleet) {
const remaining = fleetState.remaining.get(slot.modelId) ?? 0;
idleCapacity += remaining;
}
const pendingBatch = Math.min(batchApi.pendingQueue + batchApi.totalBatchDemand, BATCH_API_MAX_PENDING);
batchTokensServed = Math.min(pendingBatch, idleCapacity);
const baseTokenPrice = 3.0;
batchRevenue = (batchTokensServed / 1_000_000) * baseTokenPrice * (1 - overloadPolicy.batchApiDiscount);
updatedBatchApi.pendingQueue = Math.max(0, pendingBatch - batchTokensServed);
updatedBatchApi.servedLastTick = batchTokensServed;
updatedBatchApi.revenue = batchRevenue;
}
const totalServed = Object.values(tierResults).reduce((s, t) => s + t.servedTokens, 0);
const totalQueued = Object.values(tierResults).reduce((s, t) => s + t.queuedTokens, 0);
const totalRejected = Object.values(tierResults).reduce((s, t) => s + t.rejectedTokens, 0);
const totalDegraded = Object.values(tierResults).reduce((s, t) => s + t.degradedTokens, 0);
let effectiveQuality = modelQuality;
if (totalServed > 0) {
let qualitySum = 0;
for (const t of Object.values(tierResults)) {
qualitySum += t.avgQualityDelivered * t.servedTokens;
}
effectiveQuality = qualitySum / totalServed;
}
const queuedFraction = totalDemand > 0 ? totalQueued / totalDemand : 0;
const avgLatencyMs = BASE_LATENCY_MS + queuedFraction * 100 * QUEUE_LATENCY_MS_PER_PERCENT;
const modelUtilization: ModelUtilizationEntry[] = fleet.map(slot => ({
modelId: slot.modelId,
modelName: slot.modelName,
quantization: slot.quantization,
qualityScore: slot.qualityScore,
throughputCapacity: slot.throughputCapacity,
throughputUsed: fleetState.used.get(slot.modelId) ?? 0,
utilization: slot.throughputCapacity > 0
? Math.min(1, (fleetState.used.get(slot.modelId) ?? 0) / slot.throughputCapacity)
: 0,
}));
const autoScaleBoost = researchUnlocks.autoScalingBonus;
if (autoScaleBoost > 0) {
for (const tier of Object.keys(tierResults) as TrafficPriority[]) {
const metrics = tierResults[tier];
if (metrics.rejectedTokens > 0) {
const recovered = Math.min(metrics.rejectedTokens, metrics.rejectedTokens * autoScaleBoost);
tierResults[tier] = {
...metrics,
servedTokens: metrics.servedTokens + recovered,
rejectedTokens: metrics.rejectedTokens - recovered,
};
}
}
}
return {
servingMetrics: {
tierMetrics: tierResults,
totalServed,
totalQueued,
totalRejected,
totalDegraded,
effectiveQuality,
avgLatencyMs,
modelUtilization,
batchApiTokensServed: batchTokensServed,
batchApiRevenue: batchRevenue,
},
batchApi: updatedBatchApi,
batchRevenue,
};
}
export function computeSatisfactionImpact(
metrics: TierServingMetrics,
): number {
if (metrics.demandTokens <= 0) return 0;
const rejectedFraction = metrics.rejectedTokens / metrics.demandTokens;
const queuedFraction = metrics.queuedTokens / metrics.demandTokens;
const degradedFraction = metrics.servedTokens > 0 ? metrics.degradedTokens / metrics.servedTokens : 0;
const rejectionPenalty = rejectedFraction * REJECTION_SATISFACTION_PENALTY * 10;
const queuePenalty = queuedFraction * QUEUE_SATISFACTION_PENALTY * 10;
const degradationPenalty = degradedFraction * (1 - metrics.avgQualityDelivered) * DEGRADATION_SATISFACTION_PENALTY * 10;
return -(rejectionPenalty + queuePenalty + degradationPenalty);
}
@@ -1,8 +1,9 @@
import type { GameState } from '@ai-tycoon/shared';
import { processMarketV2 } from './market/index';
import type { ResearchBonuses } from './researchBonuses';
export type { MarketTickResult } from './market/index';
export function processMarket(state: GameState, currentTickCapacity: number) {
return processMarketV2(state, currentTickCapacity);
export function processMarket(state: GameState, currentTickCapacity: number, effectiveInferenceFlops?: number, researchBonuses?: ResearchBonuses) {
return processMarketV2(state, currentTickCapacity, effectiveInferenceFlops, researchBonuses);
}
@@ -18,6 +18,7 @@ export interface ResearchBonuses {
reputationBonus: number;
safetyBonus: number;
autoScalingBonus: number;
}
export function getResearchBonuses(completedResearch: string[]): ResearchBonuses {
@@ -37,6 +38,7 @@ export function getResearchBonuses(completedResearch: string[]): ResearchBonuses
agentsBonus: 0,
reputationBonus: 0,
safetyBonus: 0,
autoScalingBonus: 0,
};
for (const id of completedResearch) {
@@ -53,6 +55,7 @@ export function getResearchBonuses(completedResearch: string[]): ResearchBonuses
case 'pipeline_speed': bonuses.pipelineSpeedBonus += effect.value; break;
case 'data_quality': bonuses.dataQualityBonus += effect.value; break;
case 'sdk_coverage': bonuses.sdkCoverageBonus += effect.value; break;
case 'auto_scaling': bonuses.autoScalingBonus += effect.value; break;
}
break;
case 'capability_boost':