Replace decorative overload policy with real serving pipeline and dedicated Serving page

The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read) and trivial flat penalties. This replaces it with a full serving pipeline where deployed models form a fleet, requests route through priority/degradation logic, and policy choices create meaningful strategic tradeoffs. New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers), demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models under load, and Batch API to fill idle capacity at discounted rates. 4 new research nodes gate features progressively: Intelligent Request Routing, Priority Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline metrics, model fleet utilization, and research-gated policy controls. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-25 12:42:09 -04:00
parent d7d77238b9
commit 901db02a6b
17 changed files with 1349 additions and 229 deletions
@@ -1,12 +1,13 @@
-import type { ConsumerTierState, ConsumerTierId } from '@ai-tycoon/shared';
+import type { ConsumerTierState, ConsumerTierId, TierServingMetrics } from '@ai-tycoon/shared';
 import {
  CONSUMER_TIER_ORDER,
  CONVERSION_RATES,
  TIER_CHURN_RATES,
  FREE_TIER_ADOPTION_RATE,
  CONSUMER_TOKENS_PER_SUBSCRIBER,
-  OVERLOAD_PENALTY_EXPONENT,
  NETWORK_DEGRADATION,
+  REJECTION_CHURN_MULTIPLIER,
+  QUEUE_CHURN_MULTIPLIER,
 } from '@ai-tycoon/shared';

 export interface ConsumerTickResult {
@@ -20,9 +21,9 @@ export function processConsumerTiers(
  playerConsumerCustomers: number,
  modelQuality: number,
  seasonalConsumerMultiplier: number,
-  demandCapacityRatio: number,
  networkLatencyPenalty: number,
-  overloadPolicy: { degradeQualityUnderLoad: boolean; prioritizeEnterprise: boolean },
+  consumerPaidMetrics: TierServingMetrics,
+  consumerFreeMetrics: TierServingMetrics,
 ): ConsumerTickResult {
  const updated = {
    tiers: { ...tiers.tiers },
@@ -97,26 +98,64 @@ export function processConsumerTiers(

  updated.totalUsers = totalUsers;

+  const paidDemand = consumerPaidMetrics.demandTokens;
+  const freeDemand = consumerFreeMetrics.demandTokens;
+  const totalDemand = paidDemand + freeDemand;
+
+  let servingPenalty = 0;
+  if (totalDemand > 0) {
+    const totalRejected = consumerPaidMetrics.rejectedTokens + consumerFreeMetrics.rejectedTokens;
+    const totalQueued = consumerPaidMetrics.queuedTokens + consumerFreeMetrics.queuedTokens;
+    const rejectedFraction = totalRejected / totalDemand;
+    const queuedFraction = totalQueued / totalDemand;
+
+    servingPenalty = rejectedFraction * 1.5 + queuedFraction * 0.5;
+
+    const avgQuality = totalDemand > 0
+      ? (consumerPaidMetrics.avgQualityDelivered * paidDemand + consumerFreeMetrics.avgQualityDelivered * freeDemand) / totalDemand
+      : modelQuality;
+    const qualityGap = Math.max(0, modelQuality - avgQuality);
+    servingPenalty += qualityGap * 0.8;
+
+    if (consumerFreeMetrics.rejectedTokens > 0 && freeDemand > 0) {
+      const freeRejectRate = consumerFreeMetrics.rejectedTokens / freeDemand;
+      const extraChurn = updated.tiers.free.userCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
+      updated.tiers.free.userCount = Math.max(0, updated.tiers.free.userCount - extraChurn);
+    }
+
+    if (consumerPaidMetrics.rejectedTokens > 0 && paidDemand > 0) {
+      const paidRejectRate = consumerPaidMetrics.rejectedTokens / paidDemand;
+      for (const id of CONSUMER_TIER_ORDER) {
+        if (id === 'free') continue;
+        const extraChurn = updated.tiers[id].userCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
+        updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
+      }
+    }
+
+    if (totalQueued > 0) {
+      for (const id of CONSUMER_TIER_ORDER) {
+        const extraChurn = updated.tiers[id].userCount * queuedFraction * 0.002 * QUEUE_CHURN_MULTIPLIER;
+        updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
+      }
+    }
+  }
+
  let headroomBonus = 0;
-  let overloadPenalty = 0;
-  if (demandCapacityRatio <= 1) {
-    headroomBonus = (1 - demandCapacityRatio) * 0.2;
+  if (totalDemand > 0) {
+    const totalServed = consumerPaidMetrics.servedTokens + consumerFreeMetrics.servedTokens;
+    const servedFraction = totalServed / totalDemand;
+    if (servedFraction > 0.95) {
+      headroomBonus = (servedFraction - 0.95) * 4;
+    }
  } else {
-    overloadPenalty = Math.min(1, Math.pow(demandCapacityRatio - 1, OVERLOAD_PENALTY_EXPONENT));
+    headroomBonus = 0.1;
  }

  const netLatencyPenalty = networkLatencyPenalty * NETWORK_DEGRADATION.satisfactionPenaltyPerLatency;
  updated.satisfaction = Math.min(1, Math.max(0,
-    0.3 + modelQuality * 0.5 + headroomBonus - overloadPenalty - netLatencyPenalty,
+    0.3 + modelQuality * 0.5 + headroomBonus - servingPenalty - netLatencyPenalty,
  ));

-  if (overloadPolicy.degradeQualityUnderLoad && demandCapacityRatio > 0.85) {
-    updated.satisfaction = Math.max(0, updated.satisfaction - 0.02);
-  }
-  if (overloadPolicy.prioritizeEnterprise && demandCapacityRatio > 0.9) {
-    updated.satisfaction = Math.max(0, updated.satisfaction - 0.01);
-  }
-
  updated.viralCoefficient = modelQuality > 0.5 ? 1 + (modelQuality - 0.5) * 2 : 0;

  return {