Replace decorative overload policy with real serving pipeline and dedicated Serving page
CI / build-and-push (push) Successful in 28s

The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read)
and trivial flat penalties. This replaces it with a full serving pipeline where deployed
models form a fleet, requests route through priority/degradation logic, and policy choices
create meaningful strategic tradeoffs.

New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers),
demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered
serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models
under load, and Batch API to fill idle capacity at discounted rates.

4 new research nodes gate features progressively: Intelligent Request Routing, Priority
Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline
metrics, model fleet utilization, and research-gated policy controls.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 12:42:09 -04:00
parent d7d77238b9
commit 901db02a6b
17 changed files with 1349 additions and 229 deletions
@@ -1,12 +1,13 @@
import type { ConsumerTierState, ConsumerTierId } from '@ai-tycoon/shared';
import type { ConsumerTierState, ConsumerTierId, TierServingMetrics } from '@ai-tycoon/shared';
import {
CONSUMER_TIER_ORDER,
CONVERSION_RATES,
TIER_CHURN_RATES,
FREE_TIER_ADOPTION_RATE,
CONSUMER_TOKENS_PER_SUBSCRIBER,
OVERLOAD_PENALTY_EXPONENT,
NETWORK_DEGRADATION,
REJECTION_CHURN_MULTIPLIER,
QUEUE_CHURN_MULTIPLIER,
} from '@ai-tycoon/shared';
export interface ConsumerTickResult {
@@ -20,9 +21,9 @@ export function processConsumerTiers(
playerConsumerCustomers: number,
modelQuality: number,
seasonalConsumerMultiplier: number,
demandCapacityRatio: number,
networkLatencyPenalty: number,
overloadPolicy: { degradeQualityUnderLoad: boolean; prioritizeEnterprise: boolean },
consumerPaidMetrics: TierServingMetrics,
consumerFreeMetrics: TierServingMetrics,
): ConsumerTickResult {
const updated = {
tiers: { ...tiers.tiers },
@@ -97,26 +98,64 @@ export function processConsumerTiers(
updated.totalUsers = totalUsers;
const paidDemand = consumerPaidMetrics.demandTokens;
const freeDemand = consumerFreeMetrics.demandTokens;
const totalDemand = paidDemand + freeDemand;
let servingPenalty = 0;
if (totalDemand > 0) {
const totalRejected = consumerPaidMetrics.rejectedTokens + consumerFreeMetrics.rejectedTokens;
const totalQueued = consumerPaidMetrics.queuedTokens + consumerFreeMetrics.queuedTokens;
const rejectedFraction = totalRejected / totalDemand;
const queuedFraction = totalQueued / totalDemand;
servingPenalty = rejectedFraction * 1.5 + queuedFraction * 0.5;
const avgQuality = totalDemand > 0
? (consumerPaidMetrics.avgQualityDelivered * paidDemand + consumerFreeMetrics.avgQualityDelivered * freeDemand) / totalDemand
: modelQuality;
const qualityGap = Math.max(0, modelQuality - avgQuality);
servingPenalty += qualityGap * 0.8;
if (consumerFreeMetrics.rejectedTokens > 0 && freeDemand > 0) {
const freeRejectRate = consumerFreeMetrics.rejectedTokens / freeDemand;
const extraChurn = updated.tiers.free.userCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
updated.tiers.free.userCount = Math.max(0, updated.tiers.free.userCount - extraChurn);
}
if (consumerPaidMetrics.rejectedTokens > 0 && paidDemand > 0) {
const paidRejectRate = consumerPaidMetrics.rejectedTokens / paidDemand;
for (const id of CONSUMER_TIER_ORDER) {
if (id === 'free') continue;
const extraChurn = updated.tiers[id].userCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
}
}
if (totalQueued > 0) {
for (const id of CONSUMER_TIER_ORDER) {
const extraChurn = updated.tiers[id].userCount * queuedFraction * 0.002 * QUEUE_CHURN_MULTIPLIER;
updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
}
}
}
let headroomBonus = 0;
let overloadPenalty = 0;
if (demandCapacityRatio <= 1) {
headroomBonus = (1 - demandCapacityRatio) * 0.2;
if (totalDemand > 0) {
const totalServed = consumerPaidMetrics.servedTokens + consumerFreeMetrics.servedTokens;
const servedFraction = totalServed / totalDemand;
if (servedFraction > 0.95) {
headroomBonus = (servedFraction - 0.95) * 4;
}
} else {
overloadPenalty = Math.min(1, Math.pow(demandCapacityRatio - 1, OVERLOAD_PENALTY_EXPONENT));
headroomBonus = 0.1;
}
const netLatencyPenalty = networkLatencyPenalty * NETWORK_DEGRADATION.satisfactionPenaltyPerLatency;
updated.satisfaction = Math.min(1, Math.max(0,
0.3 + modelQuality * 0.5 + headroomBonus - overloadPenalty - netLatencyPenalty,
0.3 + modelQuality * 0.5 + headroomBonus - servingPenalty - netLatencyPenalty,
));
if (overloadPolicy.degradeQualityUnderLoad && demandCapacityRatio > 0.85) {
updated.satisfaction = Math.max(0, updated.satisfaction - 0.02);
}
if (overloadPolicy.prioritizeEnterprise && demandCapacityRatio > 0.9) {
updated.satisfaction = Math.max(0, updated.satisfaction - 0.01);
}
updated.viralCoefficient = modelQuality > 0.5 ? 1 + (modelQuality - 0.5) * 2 : 0;
return {