Replace decorative overload policy with real serving pipeline and dedicated Serving page

The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read) and trivial flat penalties. This replaces it with a full serving pipeline where deployed models form a fleet, requests route through priority/degradation logic, and policy choices create meaningful strategic tradeoffs. New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers), demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models under load, and Batch API to fill idle capacity at discounted rates. 4 new research nodes gate features progressively: Intelligent Request Routing, Priority Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline metrics, model fleet utilization, and research-gated policy controls. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-25 12:42:09 -04:00
parent d7d77238b9
commit 901db02a6b
17 changed files with 1349 additions and 229 deletions
@@ -17,6 +17,7 @@ import { DataPage } from '@/pages/DataPage';
 import { CompetitorsPage } from '@/pages/CompetitorsPage';
 import { AchievementsPage } from '@/pages/AchievementsPage';
 import { LeaderboardPage } from '@/pages/LeaderboardPage';
+import { ServingPage } from '@/pages/ServingPage';

 export function MainLayout() {
  const { subPath, setSubPath } = useHashRouter();
@@ -45,6 +46,7 @@ function PageRouter({ page, subPath, setSubPath }: { page: string; subPath: stri
    case 'research': return <ResearchPage />;
    case 'models': return <ModelsPage />;
    case 'market': return <MarketPage initialTab={subPath} onTabChange={setSubPath} />;
+    case 'serving': return <ServingPage />;
    case 'finance': return <FinancePage />;
    case 'talent': return <TalentPage />;
    case 'data': return <DataPage />;
@@ -1,7 +1,7 @@
 import { useState, useEffect, useRef } from 'react';
 import {
  LayoutDashboard, Server, FlaskConical, Brain,
-  TrendingUp, Users, Database, Swords, DollarSign, Settings, Trophy, Medal,
+  TrendingUp, Activity, Users, Database, Swords, DollarSign, Settings, Trophy, Medal,
  PanelLeftClose, PanelLeftOpen,
 } from 'lucide-react';
 import { useGameStore, type ActivePage } from '@/store';
@@ -12,6 +12,7 @@ const NAV_ITEMS: { page: ActivePage; label: string; icon: typeof LayoutDashboard
  { page: 'research', label: 'Research', icon: FlaskConical },
  { page: 'models', label: 'Models', icon: Brain },
  { page: 'market', label: 'Market', icon: TrendingUp },
+  { page: 'serving', label: 'Serving', icon: Activity },
  { page: 'finance', label: 'Finance', icon: DollarSign },
  { page: 'talent', label: 'Talent', icon: Users, era: 'scaleup' },
  { page: 'data', label: 'Data', icon: Database, era: 'scaleup' },
@@ -1,9 +1,4 @@
-import { useState, useEffect, useRef, useCallback } from 'react';
-import { useGameStore } from '@/store';
-import {
-  formatNumber, formatMoney, formatPercent,
-} from '@ai-tycoon/shared';
-import { Users, Zap, Shield, Settings2, Check } from 'lucide-react';
+import { useState } from 'react';
 import { TutorialHint } from '@/components/game/TutorialHint';
 import { MarketOverviewPanel } from './market/MarketOverviewPanel';
 import { ConsumerTiersPanel } from './market/ConsumerTiersPanel';
@@ -12,7 +7,7 @@ import { EnterprisePipelinePanel } from './market/EnterprisePipelinePanel';
 import { DeveloperEcosystemPanel } from './market/DeveloperEcosystemPanel';
 import { ProductLinesPanel } from './market/ProductLinesPanel';

-type MarketTab = 'overview' | 'consumer' | 'api' | 'enterprise' | 'ecosystem' | 'products' | 'settings';
+type MarketTab = 'overview' | 'consumer' | 'api' | 'enterprise' | 'ecosystem' | 'products';

 const TABS: { id: MarketTab; label: string }[] = [
  { id: 'overview', label: 'Overview' },
@@ -21,133 +16,8 @@ const TABS: { id: MarketTab; label: string }[] = [
  { id: 'enterprise', label: 'Enterprise' },
  { id: 'ecosystem', label: 'Dev Ecosystem' },
  { id: 'products', label: 'Products' },
-  { id: 'settings', label: 'Settings' },
 ];

-function useAppliedFeedback() {
-  const [state, setState] = useState<'hidden' | 'valid' | 'invalid'>('hidden');
-  const timerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
-  const trigger = useCallback((valid = true) => {
-    setState(valid ? 'valid' : 'invalid');
-    clearTimeout(timerRef.current);
-    timerRef.current = setTimeout(() => setState('hidden'), 1200);
-  }, []);
-  useEffect(() => () => clearTimeout(timerRef.current), []);
-  return { show: state !== 'hidden', valid: state === 'valid', trigger };
-}
-
-function AppliedBadge({ visible, valid = true }: { visible: boolean; valid?: boolean }) {
-  if (!visible) return null;
-  if (!valid) {
-    return (
-      <span className="inline-flex items-center gap-1 text-[10px] text-danger ml-2 animate-pulse">
-        Invalid
-      </span>
-    );
-  }
-  return (
-    <span className="inline-flex items-center gap-1 text-[10px] text-success ml-2 animate-pulse">
-      <Check size={10} /> Applied
-    </span>
-  );
-}
-
-function SettingsPanel() {
-  const overloadPolicy = useGameStore((s) => s.market.overloadPolicy);
-  const inferenceUtil = useGameStore((s) => s.compute.inferenceUtilization);
-  const tokensCapacity = useGameStore((s) => s.compute.tokensPerSecondCapacity);
-  const tokensDemand = useGameStore((s) => s.compute.tokensPerSecondDemand);
-  const setOverloadPolicy = useGameStore((s) => s.setOverloadPolicy);
-  const policyFeedback = useAppliedFeedback();
-
-  return (
-    <div className="space-y-4">
-      <div className="grid grid-cols-3 gap-4">
-        <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
-          <div className="flex items-center gap-2 mb-2">
-            <Zap size={16} className="text-blue-400" />
-            <span className="text-xs text-surface-400 uppercase">Inference Load</span>
-          </div>
-          <div className="text-2xl font-bold font-mono">{formatPercent(inferenceUtil)}</div>
-          <div className="text-xs text-surface-400 mt-1">
-            {formatNumber(tokensDemand)} / {formatNumber(tokensCapacity)} tok/s
-          </div>
-        </div>
-        <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
-          <div className="flex items-center gap-2 mb-2">
-            <Users size={16} className="text-orange-400" />
-            <span className="text-xs text-surface-400 uppercase">Subscribers</span>
-          </div>
-          <div className="text-2xl font-bold font-mono">{formatNumber(useGameStore.getState().market.consumerTiers.totalUsers)}</div>
-        </div>
-        <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
-          <div className="flex items-center gap-2 mb-2">
-            <Shield size={16} className="text-green-400" />
-            <span className="text-xs text-surface-400 uppercase">Satisfaction</span>
-          </div>
-          <div className="text-2xl font-bold font-mono">{formatPercent(useGameStore.getState().market.consumerTiers.satisfaction)}</div>
-        </div>
-      </div>
-
-      <div className="bg-surface-900 border border-surface-700 rounded-xl p-4 space-y-3">
-        <h3 className="font-semibold flex items-center gap-2">
-          <Settings2 size={16} />
-          Overload Policy
-          <AppliedBadge visible={policyFeedback.show} valid={policyFeedback.valid} />
-        </h3>
-        <div className="grid grid-cols-2 gap-4">
-          <div>
-            <label className="block text-xs text-surface-400 mb-1">Max Queue Depth</label>
-            <input
-              type="number"
-              value={overloadPolicy.maxQueueDepth}
-              onChange={(e) => { const v = Number(e.target.value); if (v >= 10) { setOverloadPolicy({ maxQueueDepth: v }); policyFeedback.trigger(true); } else { policyFeedback.trigger(false); } }}
-              className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-1.5 text-sm font-mono focus:outline-none focus:ring-2 focus:ring-accent/50"
-              min={10}
-              step={10}
-            />
-            <p className="text-[10px] text-surface-500 mt-0.5">Higher = more latency tolerance, lower satisfaction</p>
-          </div>
-          <div>
-            <label className="block text-xs text-surface-400 mb-1">Rate Limit / Customer (tok/s)</label>
-            <input
-              type="number"
-              value={overloadPolicy.rateLimitPerCustomer}
-              onChange={(e) => { const v = Number(e.target.value); if (v >= 100) { setOverloadPolicy({ rateLimitPerCustomer: v }); policyFeedback.trigger(true); } else { policyFeedback.trigger(false); } }}
-              className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-1.5 text-sm font-mono focus:outline-none focus:ring-2 focus:ring-accent/50"
-              min={100}
-              step={100}
-            />
-            <p className="text-[10px] text-surface-500 mt-0.5">Lower = less compute per user, serves more customers</p>
-          </div>
-        </div>
-        <div className="flex items-center gap-6">
-          <label className="flex items-center gap-2 text-sm cursor-pointer">
-            <input
-              type="checkbox"
-              checked={overloadPolicy.degradeQualityUnderLoad}
-              onChange={(e) => { setOverloadPolicy({ degradeQualityUnderLoad: e.target.checked }); policyFeedback.trigger(); }}
-              className="accent-accent"
-            />
-            <span className="text-surface-300">Degrade quality under load</span>
-            <span className="text-[10px] text-surface-500">Reduces quality to maintain throughput</span>
-          </label>
-          <label className="flex items-center gap-2 text-sm cursor-pointer">
-            <input
-              type="checkbox"
-              checked={overloadPolicy.prioritizeEnterprise}
-              onChange={(e) => { setOverloadPolicy({ prioritizeEnterprise: e.target.checked }); policyFeedback.trigger(); }}
-              className="accent-accent"
-            />
-            <span className="text-surface-300">Prioritize enterprise</span>
-            <span className="text-[10px] text-surface-500">Enterprise SLAs before consumer traffic</span>
-          </label>
-        </div>
-      </div>
-    </div>
-  );
-}
-
 const VALID_TABS = new Set(TABS.map(t => t.id));

 export function MarketPage({ initialTab, onTabChange }: { initialTab?: string | null; onTabChange?: (tab: string | null) => void }) {
@@ -189,7 +59,6 @@ export function MarketPage({ initialTab, onTabChange }: { initialTab?: string |
      {activeTab === 'enterprise' && <EnterprisePipelinePanel />}
      {activeTab === 'ecosystem' && <DeveloperEcosystemPanel />}
      {activeTab === 'products' && <ProductLinesPanel />}
-      {activeTab === 'settings' && <SettingsPanel />}
    </div>
  );
 }
@@ -0,0 +1,484 @@
+import { useGameStore } from '@/store';
+import {
+  formatNumber, formatPercent,
+  type TrafficPriority, type OverflowBehavior, type RoutingStrategy,
+  TRAFFIC_PRIORITIES,
+} from '@ai-tycoon/shared';
+import {
+  Activity, Shield, Clock, CheckCircle, XCircle, Layers,
+  AlertTriangle, Zap, Server, ArrowRight,
+} from 'lucide-react';
+
+const TIER_COLORS: Record<TrafficPriority, string> = {
+  'enterprise': 'text-purple-400',
+  'api-paid': 'text-blue-400',
+  'consumer-paid': 'text-green-400',
+  'api-free': 'text-yellow-400',
+  'consumer-free': 'text-surface-400',
+};
+
+const TIER_BG: Record<TrafficPriority, string> = {
+  'enterprise': 'bg-purple-500/20',
+  'api-paid': 'bg-blue-500/20',
+  'consumer-paid': 'bg-green-500/20',
+  'api-free': 'bg-yellow-500/20',
+  'consumer-free': 'bg-surface-500/20',
+};
+
+const TIER_LABELS: Record<TrafficPriority, string> = {
+  'enterprise': 'Enterprise',
+  'api-paid': 'API Paid',
+  'consumer-paid': 'Consumer Paid',
+  'api-free': 'API Free',
+  'consumer-free': 'Consumer Free',
+};
+
+const OVERFLOW_OPTIONS: { value: OverflowBehavior; label: string }[] = [
+  { value: 'queue', label: 'Queue' },
+  { value: 'reject', label: 'Reject' },
+  { value: 'degrade', label: 'Degrade' },
+];
+
+const ROUTING_OPTIONS: { value: RoutingStrategy; label: string; desc: string }[] = [
+  { value: 'quality-first', label: 'Quality First', desc: 'Best model first — maximizes quality' },
+  { value: 'balanced', label: 'Balanced', desc: 'Adapts to load — quality when idle, speed when busy' },
+  { value: 'speed-first', label: 'Speed First', desc: 'Fastest model first — maximizes throughput' },
+];
+
+function MetricCard({ icon: Icon, label, value, sub, color }: {
+  icon: typeof Activity; label: string; value: string; sub?: string; color: string;
+}) {
+  return (
+    <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
+      <div className="flex items-center gap-2 mb-2">
+        <Icon size={16} className={color} />
+        <span className="text-xs text-surface-400 uppercase">{label}</span>
+      </div>
+      <div className="text-2xl font-bold font-mono">{value}</div>
+      {sub && <div className="text-xs text-surface-400 mt-1">{sub}</div>}
+    </div>
+  );
+}
+
+function PipelineFlow() {
+  const sm = useGameStore(s => s.market.servingMetrics);
+  const tiers = sm.tierMetrics;
+
+  return (
+    <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
+      <h3 className="font-semibold flex items-center gap-2 mb-4">
+        <ArrowRight size={16} />
+        Request Pipeline
+      </h3>
+      <div className="overflow-x-auto">
+        <table className="w-full text-sm">
+          <thead>
+            <tr className="text-xs text-surface-400 uppercase">
+              <th className="text-left py-2 px-2">Tier</th>
+              <th className="text-right py-2 px-2">Demand</th>
+              <th className="text-right py-2 px-2">Served</th>
+              <th className="text-right py-2 px-2">Queued</th>
+              <th className="text-right py-2 px-2">Rejected</th>
+              <th className="text-right py-2 px-2">Degraded</th>
+              <th className="text-right py-2 px-2">Quality</th>
+            </tr>
+          </thead>
+          <tbody>
+            {TRAFFIC_PRIORITIES.map(tier => {
+              const m = tiers[tier];
+              if (!m || m.demandTokens === 0) return (
+                <tr key={tier} className="border-t border-surface-800">
+                  <td className={`py-2 px-2 font-medium ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</td>
+                  <td className="text-right py-2 px-2 text-surface-500">—</td>
+                  <td className="text-right py-2 px-2 text-surface-500">—</td>
+                  <td className="text-right py-2 px-2 text-surface-500">—</td>
+                  <td className="text-right py-2 px-2 text-surface-500">—</td>
+                  <td className="text-right py-2 px-2 text-surface-500">—</td>
+                  <td className="text-right py-2 px-2 text-surface-500">—</td>
+                </tr>
+              );
+              return (
+                <tr key={tier} className="border-t border-surface-800">
+                  <td className={`py-2 px-2 font-medium ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</td>
+                  <td className="text-right py-2 px-2 font-mono">{formatNumber(m.demandTokens)}</td>
+                  <td className="text-right py-2 px-2 font-mono text-green-400">{formatNumber(m.servedTokens)}</td>
+                  <td className="text-right py-2 px-2 font-mono text-yellow-400">{m.queuedTokens > 0 ? formatNumber(m.queuedTokens) : '—'}</td>
+                  <td className="text-right py-2 px-2 font-mono text-red-400">{m.rejectedTokens > 0 ? formatNumber(m.rejectedTokens) : '—'}</td>
+                  <td className="text-right py-2 px-2 font-mono text-orange-400">{m.degradedTokens > 0 ? formatNumber(m.degradedTokens) : '—'}</td>
+                  <td className="text-right py-2 px-2 font-mono">{formatPercent(m.avgQualityDelivered)}</td>
+                </tr>
+              );
+            })}
+          </tbody>
+        </table>
+      </div>
+    </div>
+  );
+}
+
+function ModelFleetPanel() {
+  const utilization = useGameStore(s => s.market.servingMetrics.modelUtilization);
+
+  if (utilization.length === 0) {
+    return (
+      <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
+        <h3 className="font-semibold flex items-center gap-2 mb-3">
+          <Server size={16} />
+          Model Fleet
+        </h3>
+        <p className="text-sm text-surface-500">No models deployed. Train and deploy models to start serving requests.</p>
+      </div>
+    );
+  }
+
+  return (
+    <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
+      <h3 className="font-semibold flex items-center gap-2 mb-3">
+        <Server size={16} />
+        Model Fleet
+      </h3>
+      <div className="space-y-2">
+        {utilization.map(m => (
+          <div key={m.modelId} className="flex items-center gap-3">
+            <div className="w-40 truncate text-sm">
+              <span className="font-medium">{m.modelName}</span>
+              {m.quantization && <span className="text-xs text-surface-400 ml-1">({m.quantization.toUpperCase()})</span>}
+            </div>
+            <div className="flex-1">
+              <div className="h-3 bg-surface-800 rounded-full overflow-hidden">
+                <div
+                  className={`h-full rounded-full transition-all ${
+                    m.utilization > 0.9 ? 'bg-red-500' : m.utilization > 0.7 ? 'bg-yellow-500' : 'bg-green-500'
+                  }`}
+                  style={{ width: `${Math.min(100, m.utilization * 100)}%` }}
+                />
+              </div>
+            </div>
+            <div className="w-12 text-right text-xs font-mono">{formatPercent(m.utilization)}</div>
+            <div className="w-16 text-right text-xs text-surface-400">Q:{(m.qualityScore * 100).toFixed(0)}</div>
+            <div className="w-20 text-right text-xs text-surface-400">{formatNumber(m.throughputCapacity)} t/s</div>
+          </div>
+        ))}
+      </div>
+    </div>
+  );
+}
+
+function PolicyControls() {
+  const policy = useGameStore(s => s.market.overloadPolicy);
+  const setPolicy = useGameStore(s => s.setOverloadPolicy);
+  const completedResearch = useGameStore(s => s.research?.completedResearch ?? []);
+
+  const hasRouting = completedResearch.includes('request-routing');
+  const hasPriorityQueues = completedResearch.includes('priority-queues');
+  const hasBatching = completedResearch.includes('request-batching');
+  const hasAutoScaling = completedResearch.includes('auto-scaling');
+
+  return (
+    <div className="bg-surface-900 border border-surface-700 rounded-xl p-4 space-y-4">
+      <h3 className="font-semibold flex items-center gap-2">
+        <Layers size={16} />
+        Policy Controls
+      </h3>
+
+      {/* Always available: Enterprise Reservation */}
+      <div>
+        <label className="block text-xs text-surface-400 mb-1">Enterprise Capacity Reservation</label>
+        <div className="flex items-center gap-3">
+          <input
+            type="range"
+            min={0} max={50} step={5}
+            value={policy.enterpriseReservation * 100}
+            onChange={e => setPolicy({ enterpriseReservation: Number(e.target.value) / 100 })}
+            className="flex-1 accent-accent"
+          />
+          <span className="w-12 text-right font-mono text-sm">{(policy.enterpriseReservation * 100).toFixed(0)}%</span>
+        </div>
+        <p className="text-[10px] text-surface-500 mt-0.5">Reserve capacity for enterprise SLAs — protects contracts but limits other tiers</p>
+      </div>
+
+      {/* Always available: Auto-Degradation toggle */}
+      <div>
+        <label className="flex items-center gap-2 text-sm cursor-pointer">
+          <input
+            type="checkbox"
+            checked={policy.autoDegradation.enabled}
+            onChange={e => setPolicy({
+              autoDegradation: { ...policy.autoDegradation, enabled: e.target.checked },
+            })}
+            className="accent-accent"
+          />
+          <span className="text-surface-300">Auto-Degradation</span>
+          <span className="text-[10px] text-surface-500">Fall back to faster models under load</span>
+        </label>
+        {hasAutoScaling && policy.autoDegradation.enabled && (
+          <div className="mt-2 ml-6 space-y-2">
+            <div>
+              <label className="block text-xs text-surface-400 mb-1">Trigger Threshold</label>
+              <div className="flex items-center gap-3">
+                <input
+                  type="range"
+                  min={70} max={95} step={5}
+                  value={policy.autoDegradation.triggerThreshold * 100}
+                  onChange={e => setPolicy({
+                    autoDegradation: { ...policy.autoDegradation, triggerThreshold: Number(e.target.value) / 100 },
+                  })}
+                  className="flex-1 accent-accent"
+                />
+                <span className="w-12 text-right font-mono text-sm">{(policy.autoDegradation.triggerThreshold * 100).toFixed(0)}%</span>
+              </div>
+            </div>
+            <div>
+              <label className="block text-xs text-surface-400 mb-1">Minimum Quality Floor</label>
+              <div className="flex items-center gap-3">
+                <input
+                  type="range"
+                  min={50} max={100} step={5}
+                  value={policy.autoDegradation.minQualityFloor * 100}
+                  onChange={e => setPolicy({
+                    autoDegradation: { ...policy.autoDegradation, minQualityFloor: Number(e.target.value) / 100 },
+                  })}
+                  className="flex-1 accent-accent"
+                />
+                <span className="w-12 text-right font-mono text-sm">{(policy.autoDegradation.minQualityFloor * 100).toFixed(0)}%</span>
+              </div>
+            </div>
+          </div>
+        )}
+      </div>
+
+      {/* Routing Strategy — requires research */}
+      {hasRouting ? (
+        <div>
+          <label className="block text-xs text-surface-400 mb-2">Routing Strategy</label>
+          <div className="grid grid-cols-3 gap-2">
+            {ROUTING_OPTIONS.map(opt => (
+              <button
+                key={opt.value}
+                onClick={() => setPolicy({ routingStrategy: opt.value })}
+                className={`p-2 rounded-lg border text-sm text-left transition-colors ${
+                  policy.routingStrategy === opt.value
+                    ? 'border-accent bg-accent/10 text-accent-light'
+                    : 'border-surface-700 text-surface-300 hover:border-surface-600'
+                }`}
+              >
+                <div className="font-medium">{opt.label}</div>
+                <div className="text-[10px] text-surface-500 mt-0.5">{opt.desc}</div>
+              </button>
+            ))}
+          </div>
+        </div>
+      ) : (
+        <div className="flex items-center gap-2 text-xs text-surface-500 bg-surface-800 rounded-lg p-3">
+          <AlertTriangle size={14} />
+          Research "Intelligent Request Routing" to unlock routing strategies and per-tier rate limits
+        </div>
+      )}
+
+      {/* Priority & Overflow — requires research */}
+      {hasPriorityQueues ? (
+        <div>
+          <label className="block text-xs text-surface-400 mb-2">Per-Tier Overflow Behavior</label>
+          <div className="space-y-1.5">
+            {TRAFFIC_PRIORITIES.map(tier => (
+              <div key={tier} className="flex items-center gap-3">
+                <span className={`w-32 text-sm ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</span>
+                <select
+                  value={policy.overflowBehavior[tier]}
+                  onChange={e => setPolicy({
+                    overflowBehavior: {
+                      ...policy.overflowBehavior,
+                      [tier]: e.target.value as OverflowBehavior,
+                    },
+                  })}
+                  className="bg-surface-800 border border-surface-600 rounded px-2 py-1 text-sm"
+                >
+                  {OVERFLOW_OPTIONS.map(opt => (
+                    <option key={opt.value} value={opt.value}>{opt.label}</option>
+                  ))}
+                </select>
+              </div>
+            ))}
+          </div>
+          <div className="mt-3">
+            <label className="block text-xs text-surface-400 mb-1">Max Queue Depth</label>
+            <div className="flex items-center gap-3">
+              <input
+                type="range"
+                min={10} max={500} step={10}
+                value={policy.maxQueueDepth}
+                onChange={e => setPolicy({ maxQueueDepth: Number(e.target.value) })}
+                className="flex-1 accent-accent"
+              />
+              <span className="w-16 text-right font-mono text-sm">{policy.maxQueueDepth}</span>
+            </div>
+          </div>
+        </div>
+      ) : !hasRouting ? null : (
+        <div className="flex items-center gap-2 text-xs text-surface-500 bg-surface-800 rounded-lg p-3">
+          <AlertTriangle size={14} />
+          Research "Priority Queue System" to unlock per-tier overflow behavior and queue controls
+        </div>
+      )}
+
+      {/* Batch API — requires research */}
+      {hasBatching ? (
+        <div>
+          <label className="flex items-center gap-2 text-sm cursor-pointer">
+            <input
+              type="checkbox"
+              checked={policy.batchApiEnabled}
+              onChange={e => setPolicy({ batchApiEnabled: e.target.checked })}
+              className="accent-accent"
+            />
+            <span className="text-surface-300">Batch API</span>
+            <span className="text-[10px] text-surface-500">Fill idle capacity with discounted batch requests</span>
+          </label>
+          {policy.batchApiEnabled && (
+            <div className="mt-2 ml-6">
+              <label className="block text-xs text-surface-400 mb-1">Batch Discount</label>
+              <div className="flex items-center gap-3">
+                <input
+                  type="range"
+                  min={30} max={70} step={5}
+                  value={policy.batchApiDiscount * 100}
+                  onChange={e => setPolicy({ batchApiDiscount: Number(e.target.value) / 100 })}
+                  className="flex-1 accent-accent"
+                />
+                <span className="w-12 text-right font-mono text-sm">{(policy.batchApiDiscount * 100).toFixed(0)}%</span>
+              </div>
+              <p className="text-[10px] text-surface-500 mt-0.5">Higher discount = more batch demand, lower per-token revenue</p>
+            </div>
+          )}
+        </div>
+      ) : hasRouting ? (
+        <div className="flex items-center gap-2 text-xs text-surface-500 bg-surface-800 rounded-lg p-3">
+          <AlertTriangle size={14} />
+          Research "Request Batching" to unlock the Batch API product line
+        </div>
+      ) : null}
+
+      {/* Rate limits — requires routing research */}
+      {hasRouting && (
+        <div>
+          <label className="block text-xs text-surface-400 mb-2">Per-Tier Rate Limits (tok/s per customer)</label>
+          <div className="space-y-1.5">
+            {TRAFFIC_PRIORITIES.map(tier => (
+              <div key={tier} className="flex items-center gap-3">
+                <span className={`w-32 text-sm ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</span>
+                <input
+                  type="number"
+                  value={policy.rateLimitPerCustomer[tier]}
+                  onChange={e => {
+                    const v = Number(e.target.value);
+                    if (v >= 10) {
+                      setPolicy({
+                        rateLimitPerCustomer: {
+                          ...policy.rateLimitPerCustomer,
+                          [tier]: v,
+                        },
+                      });
+                    }
+                  }}
+                  className="w-28 bg-surface-800 border border-surface-600 rounded px-2 py-1 text-sm font-mono"
+                  min={10}
+                  step={100}
+                />
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+
+function BatchApiPanel() {
+  const batch = useGameStore(s => s.market.batchApi);
+  const sm = useGameStore(s => s.market.servingMetrics);
+  const policy = useGameStore(s => s.market.overloadPolicy);
+  const completedResearch = useGameStore(s => s.research?.completedResearch ?? []);
+
+  if (!completedResearch.includes('request-batching') || !policy.batchApiEnabled) return null;
+
+  return (
+    <div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
+      <h3 className="font-semibold flex items-center gap-2 mb-3">
+        <Zap size={16} className="text-blue-400" />
+        Batch API
+      </h3>
+      <div className="grid grid-cols-3 gap-4 text-sm">
+        <div>
+          <div className="text-xs text-surface-400">Pending Queue</div>
+          <div className="font-mono">{formatNumber(batch.pendingQueue)} tok</div>
+        </div>
+        <div>
+          <div className="text-xs text-surface-400">Served Last Tick</div>
+          <div className="font-mono text-green-400">{formatNumber(batch.servedLastTick)} tok</div>
+        </div>
+        <div>
+          <div className="text-xs text-surface-400">Revenue</div>
+          <div className="font-mono text-accent">${sm.batchApiRevenue.toFixed(4)}/tick</div>
+        </div>
+      </div>
+    </div>
+  );
+}
+
+export function ServingPage() {
+  const sm = useGameStore(s => s.market.servingMetrics);
+  const compute = useGameStore(s => s.compute);
+
+  const totalDemand = sm.totalServed + sm.totalQueued + sm.totalRejected;
+  const successRate = totalDemand > 0 ? sm.totalServed / totalDemand : 1;
+
+  return (
+    <div className="space-y-4">
+      <h2 className="text-2xl font-bold">Serving Pipeline</h2>
+
+      {/* Top metrics */}
+      <div className="grid grid-cols-4 gap-4">
+        <MetricCard
+          icon={Activity}
+          label="Throughput"
+          value={`${formatNumber(compute.tokensPerSecondDemand)} / ${formatNumber(compute.tokensPerSecondCapacity)}`}
+          sub={`${formatPercent(compute.inferenceUtilization)} utilization`}
+          color="text-blue-400"
+        />
+        <MetricCard
+          icon={Shield}
+          label="Effective Quality"
+          value={formatPercent(sm.effectiveQuality)}
+          sub="Weighted avg quality delivered"
+          color="text-green-400"
+        />
+        <MetricCard
+          icon={Clock}
+          label="Avg Latency"
+          value={`${sm.avgLatencyMs.toFixed(0)}ms`}
+          sub={sm.totalQueued > 0 ? `${formatNumber(sm.totalQueued)} queued` : 'No queuing'}
+          color="text-yellow-400"
+        />
+        <MetricCard
+          icon={sm.totalRejected > 0 ? XCircle : CheckCircle}
+          label="Success Rate"
+          value={formatPercent(successRate)}
+          sub={sm.totalRejected > 0 ? `${formatNumber(sm.totalRejected)} rejected` : 'All requests served'}
+          color={sm.totalRejected > 0 ? 'text-red-400' : 'text-green-400'}
+        />
+      </div>
+
+      {/* Pipeline flow table */}
+      <PipelineFlow />
+
+      {/* Batch API metrics */}
+      <BatchApiPanel />
+
+      {/* Bottom row: controls + fleet */}
+      <div className="grid grid-cols-2 gap-4">
+        <PolicyControls />
+        <ModelFleetPanel />
+      </div>
+    </div>
+  );
+}
@@ -48,7 +48,7 @@ import {
 import { INITIAL_RIVALS } from '@ai-tycoon/game-engine';

 export type ActivePage = 'dashboard' | 'infrastructure' | 'research' | 'models'
-  | 'market' | 'talent' | 'data' | 'competitors' | 'finance' | 'achievements' | 'leaderboard' | 'settings';
+  | 'market' | 'serving' | 'talent' | 'data' | 'competitors' | 'finance' | 'achievements' | 'leaderboard' | 'settings';

 export type InfraNavLevel = 'clusters' | 'cluster' | 'campus' | 'datacenter';

@@ -433,6 +433,48 @@ export const TECH_TREE: ResearchNode[] = [
    effects: [{ type: 'unlock_product_line', target: 'agents-platform', value: 1 }],
  },

+  // === SERVING INFRASTRUCTURE ===
+  {
+    id: 'request-routing',
+    name: 'Intelligent Request Routing',
+    description: 'Route requests to optimal model size/variant. Unlocks routing strategy and per-tier rate limits.',
+    era: 'scaleup',
+    category: 'efficiency',
+    prerequisites: ['inference-optimization'],
+    cost: { researchPoints: 2, compute: 25, ticks: 150 },
+    effects: [{ type: 'unlock_feature', target: 'request-routing', value: 1 }],
+  },
+  {
+    id: 'priority-queues',
+    name: 'Priority Queue System',
+    description: 'SLA-aware scheduling with granular priority controls. Unlocks priority ordering and overflow policies.',
+    era: 'scaleup',
+    category: 'efficiency',
+    prerequisites: ['request-routing'],
+    cost: { researchPoints: 3, compute: 30, ticks: 180 },
+    effects: [{ type: 'unlock_feature', target: 'priority-queues', value: 1 }],
+  },
+  {
+    id: 'request-batching',
+    name: 'Request Batching',
+    description: 'Group inference requests for higher throughput. Unlocks Batch API product line at 50% discount.',
+    era: 'scaleup',
+    category: 'efficiency',
+    prerequisites: ['inference-optimization'],
+    cost: { researchPoints: 2, compute: 20, ticks: 120 },
+    effects: [{ type: 'unlock_feature', target: 'request-batching', value: 1 }],
+  },
+  {
+    id: 'auto-scaling',
+    name: 'Auto-Scaling Infrastructure',
+    description: 'Dynamically reallocate compute during demand spikes. +20% effective capacity headroom.',
+    era: 'bigtech',
+    category: 'efficiency',
+    prerequisites: ['request-routing'],
+    cost: { researchPoints: 4, compute: 60, ticks: 300 },
+    effects: [{ type: 'efficiency_boost', target: 'auto_scaling', value: 0.2 }],
+  },
+
  // === DATA ===
  {
    id: 'data-pipeline',
@@ -1,9 +1,10 @@
-import type { ApiTierState, ApiTierId, DeveloperEcosystem } from '@ai-tycoon/shared';
+import type { ApiTierState, ApiTierId, DeveloperEcosystem, TierServingMetrics } from '@ai-tycoon/shared';
 import {
  API_TIER_ORDER,
  API_CONVERSION_RATES,
  API_TIER_CHURN_RATES,
  API_TOKENS_PER_DEVELOPER_PER_TICK,
+  REJECTION_CHURN_MULTIPLIER,
 } from '@ai-tycoon/shared';

 export interface ApiTickResult {
@@ -18,6 +19,8 @@ export function processApiTiers(
  modelQuality: number,
  seasonalApiMultiplier: number,
  ecosystem: DeveloperEcosystem,
+  apiPaidMetrics: TierServingMetrics,
+  apiFreeMetrics: TierServingMetrics,
 ): ApiTickResult {
  const updated: ApiTierState = {
    tiers: { ...tiers.tiers },
@@ -89,6 +92,23 @@ export function processApiTiers(
  updated.totalDevelopers = totalDevelopers;
  updated.totalTokensPerTick = totalTokens;

+  const freeRejectRate = apiFreeMetrics.demandTokens > 0
+    ? apiFreeMetrics.rejectedTokens / apiFreeMetrics.demandTokens : 0;
+  if (freeRejectRate > 0) {
+    const extraChurn = updated.tiers.free.developerCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
+    updated.tiers.free.developerCount = Math.max(0, updated.tiers.free.developerCount - extraChurn);
+  }
+
+  const paidRejectRate = apiPaidMetrics.demandTokens > 0
+    ? apiPaidMetrics.rejectedTokens / apiPaidMetrics.demandTokens : 0;
+  if (paidRejectRate > 0) {
+    for (const id of API_TIER_ORDER) {
+      if (id === 'free') continue;
+      const extraChurn = updated.tiers[id].developerCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
+      updated.tiers[id].developerCount = Math.max(0, updated.tiers[id].developerCount - extraChurn);
+    }
+  }
+
  return {
    apiTiers: updated,
    apiRevenue: Math.max(0, apiRevenue),
@@ -1,12 +1,13 @@
-import type { ConsumerTierState, ConsumerTierId } from '@ai-tycoon/shared';
+import type { ConsumerTierState, ConsumerTierId, TierServingMetrics } from '@ai-tycoon/shared';
 import {
  CONSUMER_TIER_ORDER,
  CONVERSION_RATES,
  TIER_CHURN_RATES,
  FREE_TIER_ADOPTION_RATE,
  CONSUMER_TOKENS_PER_SUBSCRIBER,
-  OVERLOAD_PENALTY_EXPONENT,
  NETWORK_DEGRADATION,
+  REJECTION_CHURN_MULTIPLIER,
+  QUEUE_CHURN_MULTIPLIER,
 } from '@ai-tycoon/shared';

 export interface ConsumerTickResult {
@@ -20,9 +21,9 @@ export function processConsumerTiers(
  playerConsumerCustomers: number,
  modelQuality: number,
  seasonalConsumerMultiplier: number,
-  demandCapacityRatio: number,
  networkLatencyPenalty: number,
-  overloadPolicy: { degradeQualityUnderLoad: boolean; prioritizeEnterprise: boolean },
+  consumerPaidMetrics: TierServingMetrics,
+  consumerFreeMetrics: TierServingMetrics,
 ): ConsumerTickResult {
  const updated = {
    tiers: { ...tiers.tiers },
@@ -97,26 +98,64 @@ export function processConsumerTiers(

  updated.totalUsers = totalUsers;

+  const paidDemand = consumerPaidMetrics.demandTokens;
+  const freeDemand = consumerFreeMetrics.demandTokens;
+  const totalDemand = paidDemand + freeDemand;
+
+  let servingPenalty = 0;
+  if (totalDemand > 0) {
+    const totalRejected = consumerPaidMetrics.rejectedTokens + consumerFreeMetrics.rejectedTokens;
+    const totalQueued = consumerPaidMetrics.queuedTokens + consumerFreeMetrics.queuedTokens;
+    const rejectedFraction = totalRejected / totalDemand;
+    const queuedFraction = totalQueued / totalDemand;
+
+    servingPenalty = rejectedFraction * 1.5 + queuedFraction * 0.5;
+
+    const avgQuality = totalDemand > 0
+      ? (consumerPaidMetrics.avgQualityDelivered * paidDemand + consumerFreeMetrics.avgQualityDelivered * freeDemand) / totalDemand
+      : modelQuality;
+    const qualityGap = Math.max(0, modelQuality - avgQuality);
+    servingPenalty += qualityGap * 0.8;
+
+    if (consumerFreeMetrics.rejectedTokens > 0 && freeDemand > 0) {
+      const freeRejectRate = consumerFreeMetrics.rejectedTokens / freeDemand;
+      const extraChurn = updated.tiers.free.userCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
+      updated.tiers.free.userCount = Math.max(0, updated.tiers.free.userCount - extraChurn);
+    }
+
+    if (consumerPaidMetrics.rejectedTokens > 0 && paidDemand > 0) {
+      const paidRejectRate = consumerPaidMetrics.rejectedTokens / paidDemand;
+      for (const id of CONSUMER_TIER_ORDER) {
+        if (id === 'free') continue;
+        const extraChurn = updated.tiers[id].userCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
+        updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
+      }
+    }
+
+    if (totalQueued > 0) {
+      for (const id of CONSUMER_TIER_ORDER) {
+        const extraChurn = updated.tiers[id].userCount * queuedFraction * 0.002 * QUEUE_CHURN_MULTIPLIER;
+        updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
+      }
+    }
+  }
+
  let headroomBonus = 0;
-  let overloadPenalty = 0;
-  if (demandCapacityRatio <= 1) {
-    headroomBonus = (1 - demandCapacityRatio) * 0.2;
+  if (totalDemand > 0) {
+    const totalServed = consumerPaidMetrics.servedTokens + consumerFreeMetrics.servedTokens;
+    const servedFraction = totalServed / totalDemand;
+    if (servedFraction > 0.95) {
+      headroomBonus = (servedFraction - 0.95) * 4;
+    }
  } else {
-    overloadPenalty = Math.min(1, Math.pow(demandCapacityRatio - 1, OVERLOAD_PENALTY_EXPONENT));
+    headroomBonus = 0.1;
  }

  const netLatencyPenalty = networkLatencyPenalty * NETWORK_DEGRADATION.satisfactionPenaltyPerLatency;
  updated.satisfaction = Math.min(1, Math.max(0,
-    0.3 + modelQuality * 0.5 + headroomBonus - overloadPenalty - netLatencyPenalty,
+    0.3 + modelQuality * 0.5 + headroomBonus - servingPenalty - netLatencyPenalty,
  ));

-  if (overloadPolicy.degradeQualityUnderLoad && demandCapacityRatio > 0.85) {
-    updated.satisfaction = Math.max(0, updated.satisfaction - 0.02);
-  }
-  if (overloadPolicy.prioritizeEnterprise && demandCapacityRatio > 0.9) {
-    updated.satisfaction = Math.max(0, updated.satisfaction - 0.01);
-  }
-
  updated.viralCoefficient = modelQuality > 0.5 ? 1 + (modelQuality - 0.5) * 2 : 0;

  return {
@@ -5,6 +5,7 @@ import type {
  EnterpriseSegment,
  EnterprisePipelineStage,
  DeveloperEcosystem,
+  TierServingMetrics,
 } from '@ai-tycoon/shared';
 import {
  BASE_LEAD_RATE,
@@ -17,6 +18,7 @@ import {
  ENTERPRISE_SLA_REQUIREMENTS,
  ENTERPRISE_CAPABILITY_REQUIREMENTS,
  ENTERPRISE_TOKENS_PER_TICK,
+  ENTERPRISE_REJECTION_SLA_MULTIPLIER,
 } from '@ai-tycoon/shared';
 import { ENTERPRISE_NAMES } from '../../data/enterpriseNames';

@@ -62,7 +64,7 @@ export function processEnterprisePipeline(
  devEcosystem: DeveloperEcosystem,
  seasonalEntMultiplier: number,
  currentTick: number,
-  demandCapacityRatio: number,
+  enterpriseServingMetrics: TierServingMetrics,
 ): EnterprisePipelineResult {
  const pipeline = [...ent.pipeline];
  const activeContracts = [...ent.activeContracts];
@@ -129,7 +131,10 @@ export function processEnterprisePipeline(
    if (lead.stage === 'qualification') {
      transitionProb *= modelCapability >= lead.requiredCapability ? 1 : 0.1;
    } else if (lead.stage === 'poc') {
-      transitionProb *= Math.max(0.2, 1 - Math.max(0, demandCapacityRatio - 0.9) * 5);
+      const entDemand = enterpriseServingMetrics.demandTokens;
+      const entRejected = enterpriseServingMetrics.rejectedTokens;
+      const rejectRate = entDemand > 0 ? entRejected / entDemand : 0;
+      transitionProb *= Math.max(0.2, 1 - rejectRate * 5);
    } else if (lead.stage === 'negotiation') {
      transitionProb *= Math.max(0.3, 1 - (lead.dealValue / 10_000_000) * 0.5);
    }
@@ -181,14 +186,22 @@ export function processEnterprisePipeline(
    const updated = { ...contract };
    updated.totalTicks++;

-    if (demandCapacityRatio <= (1 / updated.slaUptime)) {
+    const entDemand = enterpriseServingMetrics.demandTokens;
+    const entServed = enterpriseServingMetrics.servedTokens;
+    const entRejected = enterpriseServingMetrics.rejectedTokens;
+    const servedFraction = entDemand > 0 ? entServed / entDemand : 1;
+    const wasRejected = entRejected > 0;
+    const qualityMet = enterpriseServingMetrics.avgQualityDelivered >= 0.85;
+
+    if (servedFraction >= updated.slaUptime && qualityMet && !wasRejected) {
      updated.uptimeTicks++;
    } else {
      updated.slaViolations++;
-      const penalty = updated.pricePerMToken * (updated.tokensPerTick / 1_000_000) * SLA_PENALTY_FRACTION;
+      const severityMultiplier = wasRejected ? ENTERPRISE_REJECTION_SLA_MULTIPLIER : 1.0;
+      const penalty = updated.pricePerMToken * (updated.tokensPerTick / 1_000_000) * SLA_PENALTY_FRACTION * severityMultiplier;
      slaPenalties += penalty;
      updated.slaPenaltiesPaid += penalty;
-      updated.satisfaction = Math.max(0, updated.satisfaction - 0.005);
+      updated.satisfaction = Math.max(0, updated.satisfaction - (wasRejected ? 0.01 : 0.005));
    }

    if (updated.totalTicks > 0 && updated.slaViolations === 0) {
@@ -1,5 +1,6 @@
-import type { GameState, MarketState, BenchmarkResult, Competitor } from '@ai-tycoon/shared';
-import { CONSUMER_TOKENS_PER_SUBSCRIBER } from '@ai-tycoon/shared';
+import type { GameState, MarketState, BenchmarkResult } from '@ai-tycoon/shared';
+import { CONSUMER_TOKENS_PER_SUBSCRIBER, API_TOKENS_PER_DEVELOPER_PER_TICK, BATCH_API_DEMAND_PER_DEV, makeInitialServingMetrics } from '@ai-tycoon/shared';
+import type { TrafficPriority, TierServingMetrics } from '@ai-tycoon/shared';
 import { BENCHMARKS } from '../../data/benchmarks';
 import { computeSeasonal } from './seasonalSystem';
 import { updateObsolescence } from './obsolescenceSystem';
@@ -9,6 +10,9 @@ import { processApiTiers } from './apiTierSystem';
 import { processProductLines } from './productLines';
 import { processDeveloperEcosystem } from './developerEcosystem';
 import { processEnterprisePipeline } from './enterprisePipeline';
+import { processServingPipeline } from './servingPipeline';
+import type { DemandByTier } from './servingPipeline';
+import type { ResearchBonuses } from '../researchBonuses';

 export interface MarketTickResult {
  marketState: MarketState;
@@ -44,24 +48,26 @@ function getSegmentQuality(
  return weightedSum / totalWeight;
 }

-export function processMarketV2(state: GameState, currentTickCapacity: number): MarketTickResult {
+export function processMarketV2(
+  state: GameState,
+  currentTickCapacity: number,
+  effectiveInferenceFlops?: number,
+  researchBonuses?: ResearchBonuses,
+): MarketTickResult {
  const consumerQuality = getSegmentQuality('consumer', state.models.benchmarkResults, state.models.bestDeployedModelScore);
  const enterpriseQuality = getSegmentQuality('enterprise', state.models.benchmarkResults, state.models.bestDeployedModelScore);
  const modelQuality = state.models.benchmarkResults.length > 0
    ? (consumerQuality + enterpriseQuality) / 2
    : state.models.bestDeployedModelScore / 100;

-  // --- Seasonal ---
  const seasonal = computeSeasonal(state.meta.tickCount);

-  // --- Obsolescence ---
  const obsolescence = updateObsolescence(
    state.market.obsolescence,
    state.meta.currentEra,
    state.meta.tickCount,
  );

-  // --- Developer Ecosystem ---
  const freeApiDevs = state.market.apiTiers.tiers.free.developerCount;
  const totalApiDevs = state.market.apiTiers.totalDevelopers;
  const engineeringCount = state.talent.departments.engineering.headcount;
@@ -75,7 +81,6 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
    state.meta.currentEra,
  );

-  // --- TAM & Market Shares ---
  const chatProduct = state.models.productLines.find(p => p.type === 'chat-product');
  const textApi = state.models.productLines.find(p => p.type === 'text-api');

@@ -106,32 +111,7 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
  const playerDevCustomers = tam.segments.developer.shares.find(s => s.playerId === 'player')?.customers ?? 0;
  const playerEntCustomers = tam.segments.enterprise.shares.find(s => s.playerId === 'player')?.customers ?? 0;

-  // --- Consumer Tiers ---
-  const consumerDemandEstimate = state.market.consumerTiers.totalUsers * CONSUMER_TOKENS_PER_SUBSCRIBER;
-  const demandCapacityRatio = currentTickCapacity > 0
-    ? consumerDemandEstimate / currentTickCapacity
-    : consumerDemandEstimate > 0 ? 10 : 0;
-
-  const consumerResult = processConsumerTiers(
-    state.market.consumerTiers,
-    playerConsumerCustomers,
-    modelQuality,
-    seasonal.multipliers.consumer,
-    demandCapacityRatio,
-    state.infrastructure.networkLatencyPenalty,
-    state.market.overloadPolicy,
-  );
-
-  // --- API Tiers ---
-  const apiResult = processApiTiers(
-    state.market.apiTiers,
-    playerDevCustomers,
-    modelQuality,
-    seasonal.multipliers.api,
-    devEcosystem,
-  );
-
-  // --- Product Lines ---
+  // --- Product Lines (compute first to get token demand) ---
  const productResult = processProductLines(
    state.market.codeAssistant,
    state.market.agentsPlatform,
@@ -142,22 +122,103 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
    seasonal.multipliers.enterprise,
  );

-  // --- Enterprise Pipeline ---
+  // --- Pre-compute demand estimates by tier for serving pipeline ---
+  const consumerTiers = state.market.consumerTiers;
+  const apiTiers = state.market.apiTiers;
+  const enterprise = state.market.enterprise;
+
+  const consumerPaidTokens = (consumerTiers.tiers.plus.userCount + consumerTiers.tiers.pro.userCount + consumerTiers.tiers.team.userCount) * CONSUMER_TOKENS_PER_SUBSCRIBER;
+  const consumerFreeTokens = consumerTiers.tiers.free.userCount * CONSUMER_TOKENS_PER_SUBSCRIBER;
+
+  const apiPaidTokens =
+    apiTiers.tiers.payg.developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK.payg
+    + apiTiers.tiers.scale.developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK.scale
+    + apiTiers.tiers['enterprise-api'].developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK['enterprise-api']
+    + productResult.codeAssistantTokenDemand;
+  const apiFreeTokens = apiTiers.tiers.free.developerCount * API_TOKENS_PER_DEVELOPER_PER_TICK.free;
+
+  let enterpriseTokens = 0;
+  for (const contract of enterprise.activeContracts) {
+    enterpriseTokens += contract.tokensPerTick;
+  }
+  enterpriseTokens += productResult.agentsPlatformTokenDemand;
+
+  const demandByTier: DemandByTier = {
+    'enterprise': enterpriseTokens,
+    'api-paid': apiPaidTokens,
+    'consumer-paid': consumerPaidTokens,
+    'api-free': apiFreeTokens,
+    'consumer-free': consumerFreeTokens,
+  };
+
+  // --- Batch API demand ---
+  let batchDemand = 0;
+  if (state.market.overloadPolicy.batchApiEnabled) {
+    for (const id of ['free', 'payg', 'scale', 'enterprise-api'] as const) {
+      batchDemand += apiTiers.tiers[id].developerCount * (BATCH_API_DEMAND_PER_DEV[id] ?? 0);
+    }
+    batchDemand *= Math.max(0.1, modelQuality);
+  }
+
+  const completedResearch = state.research?.completedResearch ?? [];
+
+  // --- Serving Pipeline ---
+  const servingResult = processServingPipeline({
+    modelsState: state.models,
+    effectiveInferenceFlops: effectiveInferenceFlops ?? currentTickCapacity,
+    overloadPolicy: state.market.overloadPolicy,
+    demandByTier,
+    batchApi: {
+      ...state.market.batchApi,
+      totalBatchDemand: batchDemand,
+    },
+    modelQuality,
+    researchUnlocks: {
+      servingRoutingUnlocked: completedResearch.includes('request-routing'),
+      priorityQueuesUnlocked: completedResearch.includes('priority-queues'),
+      batchApiUnlocked: completedResearch.includes('request-batching'),
+      autoScalingBonus: completedResearch.includes('auto-scaling') ? 0.2 : 0,
+    },
+  });
+
+  const sm = servingResult.servingMetrics;
+
+  // --- Consumer Tiers (now with serving metrics) ---
+  const consumerResult = processConsumerTiers(
+    state.market.consumerTiers,
+    playerConsumerCustomers,
+    modelQuality,
+    seasonal.multipliers.consumer,
+    state.infrastructure.networkLatencyPenalty,
+    sm.tierMetrics['consumer-paid'],
+    sm.tierMetrics['consumer-free'],
+  );
+
+  // --- API Tiers (now with serving metrics) ---
+  const apiResult = processApiTiers(
+    state.market.apiTiers,
+    playerDevCustomers,
+    modelQuality,
+    seasonal.multipliers.api,
+    devEcosystem,
+    sm.tierMetrics['api-paid'],
+    sm.tierMetrics['api-free'],
+  );
+
+  // --- Enterprise Pipeline (now with serving metrics) ---
  const salesDept = state.talent.departments.sales;
-  const salesHeadcount = salesDept.headcount;
-  const salesEffectiveness = salesDept.effectiveness;

  const enterpriseResult = processEnterprisePipeline(
    state.market.enterprise,
    state.reputation.score,
    state.models.bestDeployedModelScore,
    state.models.bestDeployedSafetyScore,
-    salesHeadcount,
-    salesEffectiveness,
+    salesDept.headcount,
+    salesDept.effectiveness,
    devEcosystem,
    seasonal.multipliers.enterprise,
    state.meta.tickCount,
-    demandCapacityRatio,
+    sm.tierMetrics['enterprise'],
  );

  // --- Aggregate revenue ---
@@ -165,9 +226,10 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
    + productResult.codeAssistantRevenue
    + productResult.agentsPlatformRevenue;

-  const apiRevenue = apiResult.apiRevenue
+  let apiRevenue = apiResult.apiRevenue
    + enterpriseResult.contractRevenue
-    - enterpriseResult.slaPenalties;
+    - enterpriseResult.slaPenalties
+    + servingResult.batchRevenue;

  const totalTokenDemand = consumerResult.totalConsumerTokenDemand
    + apiResult.totalApiTokenDemand
@@ -186,26 +248,7 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
  const openSourceCount = state.market.openSourcedModels.length;
  if (openSourceCount > 0) {
    const revenueReduction = openSourceCount * 0.10 * 0.3;
-    const adjustedApiRevenue = apiRevenue * (1 - revenueReduction);
-    return {
-      marketState: {
-        ...state.market,
-        tam,
-        consumerTiers: consumerResult.consumerTiers,
-        apiTiers: apiResult.apiTiers,
-        codeAssistant: productResult.codeAssistant,
-        agentsPlatform: productResult.agentsPlatform,
-        enterprise: enterpriseResult.enterprise,
-        developerEcosystem: devEcosystem,
-        seasonalPhase: seasonal.phase,
-        seasonalMultiplier: seasonal.multipliers.consumer,
-        obsolescence,
-        subscriberHistory,
-      },
-      apiRevenue: Math.max(0, adjustedApiRevenue),
-      subscriptionRevenue,
-      totalTokenDemand,
-    };
+    apiRevenue = apiRevenue * (1 - revenueReduction);
  }

  return {
@@ -221,6 +264,8 @@ export function processMarketV2(state: GameState, currentTickCapacity: number):
      seasonalPhase: seasonal.phase,
      seasonalMultiplier: seasonal.multipliers.consumer,
      obsolescence,
+      servingMetrics: sm,
+      batchApi: servingResult.batchApi,
      subscriberHistory,
    },
    apiRevenue: Math.max(0, apiRevenue),
@@ -0,0 +1,462 @@
+import type {
+  OverloadPolicy,
+  TrafficPriority,
+  TierServingMetrics,
+  ServingMetrics,
+  ModelUtilizationEntry,
+  BatchApiState,
+} from '@ai-tycoon/shared';
+import type { BaseModel, ModelVariant, ModelFamily, ModelsState, SizeTier } from '@ai-tycoon/shared';
+import {
+  MODEL_SIZE_THROUGHPUT_SCALER,
+  MOE_SPEED_MULTIPLIER,
+  FLOPS_TO_TOKENS_MULTIPLIER,
+  QUANTIZATION_CONFIGS,
+  REJECTION_SATISFACTION_PENALTY,
+  QUEUE_SATISFACTION_PENALTY,
+  DEGRADATION_SATISFACTION_PENALTY,
+  BASE_LATENCY_MS,
+  QUEUE_LATENCY_MS_PER_PERCENT,
+  BATCH_API_MAX_PENDING,
+} from '@ai-tycoon/shared';
+import { makeInitialServingMetrics } from '@ai-tycoon/shared';
+
+export interface ModelServingSlot {
+  modelId: string;
+  modelName: string;
+  sizeTier: SizeTier;
+  isVariant: boolean;
+  quantization: string | null;
+  qualityScore: number;
+  speedMultiplier: number;
+  throughputCapacity: number;
+  isMoE: boolean;
+}
+
+export interface DemandByTier {
+  enterprise: number;
+  'api-paid': number;
+  'consumer-paid': number;
+  'api-free': number;
+  'consumer-free': number;
+}
+
+export interface ServingPipelineInput {
+  modelsState: ModelsState;
+  effectiveInferenceFlops: number;
+  overloadPolicy: OverloadPolicy;
+  demandByTier: DemandByTier;
+  batchApi: BatchApiState;
+  modelQuality: number;
+  researchUnlocks: {
+    servingRoutingUnlocked: boolean;
+    priorityQueuesUnlocked: boolean;
+    batchApiUnlocked: boolean;
+    autoScalingBonus: number;
+  };
+}
+
+export interface ServingPipelineResult {
+  servingMetrics: ServingMetrics;
+  batchApi: BatchApiState;
+  batchRevenue: number;
+}
+
+function buildModelFleet(
+  modelsState: ModelsState,
+  effectiveInferenceFlops: number,
+): ModelServingSlot[] {
+  const slots: ModelServingSlot[] = [];
+
+  const deployedBases = modelsState.baseModels.filter(m => m.isDeployed);
+  const deployedVariants: { variant: ModelVariant; baseModel: BaseModel }[] = [];
+
+  for (const family of modelsState.families) {
+    for (const variant of family.variants) {
+      if (!variant.isDeployed) continue;
+      const base = modelsState.baseModels.find(m => m.id === variant.baseModelId);
+      if (base) deployedVariants.push({ variant, baseModel: base });
+    }
+  }
+
+  const totalDeployed = deployedBases.length + deployedVariants.length;
+  if (totalDeployed === 0 || effectiveInferenceFlops <= 0) return slots;
+
+  const flopsPerModel = effectiveInferenceFlops / totalDeployed;
+
+  for (const model of deployedBases) {
+    const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[model.sizeTier] ?? 1.0;
+    const moeFactor = model.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
+    const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor;
+
+    slots.push({
+      modelId: model.id,
+      modelName: model.name,
+      sizeTier: model.sizeTier,
+      isVariant: false,
+      quantization: null,
+      qualityScore: model.rawCapability / 100,
+      speedMultiplier: moeFactor,
+      throughputCapacity: throughput,
+      isMoE: model.architecture.type === 'moe',
+    });
+  }
+
+  for (const { variant, baseModel } of deployedVariants) {
+    const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[baseModel.sizeTier] ?? 1.0;
+    const moeFactor = variant.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
+    const quantConfig = variant.quantization ? QUANTIZATION_CONFIGS[variant.quantization] : null;
+    const quantSpeedFactor = quantConfig?.speedMultiplier ?? 1.0;
+    const qualityRetention = quantConfig?.qualityRetention ?? 1.0;
+    const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor;
+
+    slots.push({
+      modelId: variant.id,
+      modelName: variant.name,
+      sizeTier: baseModel.sizeTier,
+      isVariant: true,
+      quantization: variant.quantization ?? null,
+      qualityScore: (baseModel.rawCapability / 100) * qualityRetention,
+      speedMultiplier: moeFactor * quantSpeedFactor,
+      throughputCapacity: throughput,
+      isMoE: variant.architecture.type === 'moe',
+    });
+  }
+
+  return slots;
+}
+
+function sortFleetByStrategy(
+  fleet: ModelServingSlot[],
+  strategy: string,
+  overallUtilization: number,
+): ModelServingSlot[] {
+  const sorted = [...fleet];
+  switch (strategy) {
+    case 'quality-first':
+      sorted.sort((a, b) => b.qualityScore - a.qualityScore);
+      break;
+    case 'speed-first':
+      sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
+      break;
+    case 'balanced':
+    default:
+      if (overallUtilization > 0.8) {
+        sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
+      } else {
+        sorted.sort((a, b) => b.qualityScore - a.qualityScore);
+      }
+      break;
+  }
+  return sorted;
+}
+
+interface FleetState {
+  remaining: Map<string, number>;
+  used: Map<string, number>;
+}
+
+function serveFromFleet(
+  demand: number,
+  fleet: ModelServingSlot[],
+  fleetState: FleetState,
+  policy: OverloadPolicy,
+  tier: TrafficPriority,
+  overallUtilization: number,
+): TierServingMetrics {
+  if (demand <= 0) {
+    return { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
+  }
+
+  let remaining = demand;
+  let served = 0;
+  let degraded = 0;
+  let qualityWeightedSum = 0;
+
+  const bestQuality = fleet.length > 0 ? Math.max(...fleet.map(s => s.qualityScore)) : 1;
+  const degradationActive = policy.autoDegradation.enabled && overallUtilization > policy.autoDegradation.triggerThreshold;
+
+  for (const slot of fleet) {
+    if (remaining <= 0) break;
+
+    const isDegraded = slot.qualityScore < bestQuality * 0.95;
+    if (isDegraded && !degradationActive) continue;
+    if (isDegraded && slot.qualityScore < policy.autoDegradation.minQualityFloor) continue;
+
+    const available = fleetState.remaining.get(slot.modelId) ?? 0;
+    if (available <= 0) continue;
+
+    const toServe = Math.min(remaining, available);
+    fleetState.remaining.set(slot.modelId, available - toServe);
+    fleetState.used.set(slot.modelId, (fleetState.used.get(slot.modelId) ?? 0) + toServe);
+
+    served += toServe;
+    if (isDegraded) degraded += toServe;
+    qualityWeightedSum += toServe * slot.qualityScore;
+    remaining -= toServe;
+  }
+
+  let queued = 0;
+  let rejected = 0;
+
+  if (remaining > 0) {
+    const behavior = policy.overflowBehavior[tier];
+    switch (behavior) {
+      case 'queue':
+        queued = remaining;
+        break;
+      case 'reject':
+        rejected = remaining;
+        break;
+      case 'degrade':
+        for (const slot of fleet) {
+          if (remaining <= 0) break;
+          const available = fleetState.remaining.get(slot.modelId) ?? 0;
+          if (available <= 0) continue;
+
+          const toServe = Math.min(remaining, available);
+          fleetState.remaining.set(slot.modelId, available - toServe);
+          fleetState.used.set(slot.modelId, (fleetState.used.get(slot.modelId) ?? 0) + toServe);
+          served += toServe;
+          degraded += toServe;
+          qualityWeightedSum += toServe * slot.qualityScore;
+          remaining -= toServe;
+        }
+        rejected = remaining;
+        break;
+    }
+  }
+
+  const avgQuality = served > 0 ? qualityWeightedSum / served : bestQuality;
+
+  return {
+    demandTokens: demand,
+    servedTokens: served,
+    queuedTokens: queued,
+    rejectedTokens: rejected,
+    degradedTokens: degraded,
+    avgQualityDelivered: avgQuality,
+  };
+}
+
+export function processServingPipeline(input: ServingPipelineInput): ServingPipelineResult {
+  const { modelsState, effectiveInferenceFlops, overloadPolicy, demandByTier, batchApi, modelQuality, researchUnlocks } = input;
+
+  const fleet = buildModelFleet(modelsState, effectiveInferenceFlops);
+  const totalFleetCapacity = fleet.reduce((sum, s) => sum + s.throughputCapacity, 0);
+
+  if (fleet.length === 0 || totalFleetCapacity <= 0) {
+    const metrics = makeInitialServingMetrics();
+    for (const tier of Object.keys(demandByTier) as TrafficPriority[]) {
+      const demand = demandByTier[tier] ?? 0;
+      if (demand > 0) {
+        metrics.tierMetrics[tier] = {
+          demandTokens: demand,
+          servedTokens: 0,
+          queuedTokens: 0,
+          rejectedTokens: demand,
+          degradedTokens: 0,
+          avgQualityDelivered: 0,
+        };
+        metrics.totalRejected += demand;
+      }
+    }
+    return {
+      servingMetrics: metrics,
+      batchApi: { ...batchApi, servedLastTick: 0, revenue: 0 },
+      batchRevenue: 0,
+    };
+  }
+
+  const totalDemand = Object.values(demandByTier).reduce((s, v) => s + v, 0);
+  const overallUtilization = totalFleetCapacity > 0 ? totalDemand / totalFleetCapacity : 0;
+
+  const effectiveStrategy = researchUnlocks.servingRoutingUnlocked
+    ? overloadPolicy.routingStrategy
+    : 'balanced';
+
+  const sortedFleet = sortFleetByStrategy(fleet, effectiveStrategy, overallUtilization);
+
+  const fleetState: FleetState = {
+    remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
+    used: new Map(fleet.map(s => [s.modelId, 0])),
+  };
+
+  const reservedCapacity = totalFleetCapacity * overloadPolicy.enterpriseReservation;
+  const enterpriseDemand = demandByTier['enterprise'] ?? 0;
+
+  if (reservedCapacity > 0 && enterpriseDemand > 0) {
+    const reservePerModel = reservedCapacity / fleet.length;
+    for (const slot of sortedFleet) {
+      const current = fleetState.remaining.get(slot.modelId) ?? 0;
+      const reserved = Math.min(reservePerModel, current);
+      fleetState.remaining.set(slot.modelId, current - reserved);
+    }
+  }
+
+  const effectivePriorityOrder = researchUnlocks.priorityQueuesUnlocked
+    ? overloadPolicy.priorityOrder
+    : ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'] as TrafficPriority[];
+
+  const tierResults: Record<TrafficPriority, TierServingMetrics> = {} as Record<TrafficPriority, TierServingMetrics>;
+
+  const nonEnterpriseTiers = effectivePriorityOrder.filter(t => t !== 'enterprise');
+
+  if (enterpriseDemand > 0) {
+    const enterpriseFleetState: FleetState = {
+      remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
+      used: new Map(fleet.map(s => [s.modelId, 0])),
+    };
+
+    const reserveLimit = reservedCapacity > 0 ? reservedCapacity : totalFleetCapacity;
+    let budgetLeft = reserveLimit;
+    for (const slot of sortedFleet) {
+      const cap = slot.throughputCapacity;
+      const alloc = Math.min(cap, budgetLeft);
+      enterpriseFleetState.remaining.set(slot.modelId, alloc);
+      budgetLeft -= alloc;
+      if (budgetLeft <= 0) break;
+    }
+
+    const effectiveEntDemand = researchUnlocks.servingRoutingUnlocked
+      ? Math.min(enterpriseDemand, overloadPolicy.rateLimitPerCustomer['enterprise'] * 100)
+      : enterpriseDemand;
+
+    tierResults['enterprise'] = serveFromFleet(
+      effectiveEntDemand, sortedFleet, enterpriseFleetState, overloadPolicy, 'enterprise', overallUtilization,
+    );
+
+    for (const slot of fleet) {
+      const entUsed = enterpriseFleetState.used.get(slot.modelId) ?? 0;
+      const mainRemaining = fleetState.remaining.get(slot.modelId) ?? 0;
+      fleetState.remaining.set(slot.modelId, Math.max(0, mainRemaining - entUsed + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0)));
+      fleetState.used.set(slot.modelId, entUsed);
+    }
+  } else {
+    tierResults['enterprise'] = { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
+
+    if (reservedCapacity > 0) {
+      const reservePerModel = reservedCapacity / fleet.length;
+      for (const slot of fleet) {
+        const current = fleetState.remaining.get(slot.modelId) ?? 0;
+        fleetState.remaining.set(slot.modelId, current + reservePerModel);
+      }
+    }
+  }
+
+  for (const tier of nonEnterpriseTiers) {
+    const rawDemand = demandByTier[tier] ?? 0;
+    const effectiveDemand = researchUnlocks.servingRoutingUnlocked
+      ? Math.min(rawDemand, overloadPolicy.rateLimitPerCustomer[tier] * 100)
+      : rawDemand;
+
+    tierResults[tier] = serveFromFleet(
+      effectiveDemand, sortedFleet, fleetState, overloadPolicy, tier, overallUtilization,
+    );
+  }
+
+  for (const tier of effectivePriorityOrder) {
+    if (!(tier in tierResults)) {
+      tierResults[tier] = { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
+    }
+  }
+
+  let batchTokensServed = 0;
+  let batchRevenue = 0;
+  const updatedBatchApi = { ...batchApi };
+
+  if (overloadPolicy.batchApiEnabled && researchUnlocks.batchApiUnlocked) {
+    let idleCapacity = 0;
+    for (const slot of fleet) {
+      const remaining = fleetState.remaining.get(slot.modelId) ?? 0;
+      idleCapacity += remaining;
+    }
+
+    const pendingBatch = Math.min(batchApi.pendingQueue + batchApi.totalBatchDemand, BATCH_API_MAX_PENDING);
+    batchTokensServed = Math.min(pendingBatch, idleCapacity);
+
+    const baseTokenPrice = 3.0;
+    batchRevenue = (batchTokensServed / 1_000_000) * baseTokenPrice * (1 - overloadPolicy.batchApiDiscount);
+
+    updatedBatchApi.pendingQueue = Math.max(0, pendingBatch - batchTokensServed);
+    updatedBatchApi.servedLastTick = batchTokensServed;
+    updatedBatchApi.revenue = batchRevenue;
+  }
+
+  const totalServed = Object.values(tierResults).reduce((s, t) => s + t.servedTokens, 0);
+  const totalQueued = Object.values(tierResults).reduce((s, t) => s + t.queuedTokens, 0);
+  const totalRejected = Object.values(tierResults).reduce((s, t) => s + t.rejectedTokens, 0);
+  const totalDegraded = Object.values(tierResults).reduce((s, t) => s + t.degradedTokens, 0);
+
+  let effectiveQuality = modelQuality;
+  if (totalServed > 0) {
+    let qualitySum = 0;
+    for (const t of Object.values(tierResults)) {
+      qualitySum += t.avgQualityDelivered * t.servedTokens;
+    }
+    effectiveQuality = qualitySum / totalServed;
+  }
+
+  const queuedFraction = totalDemand > 0 ? totalQueued / totalDemand : 0;
+  const avgLatencyMs = BASE_LATENCY_MS + queuedFraction * 100 * QUEUE_LATENCY_MS_PER_PERCENT;
+
+  const modelUtilization: ModelUtilizationEntry[] = fleet.map(slot => ({
+    modelId: slot.modelId,
+    modelName: slot.modelName,
+    quantization: slot.quantization,
+    qualityScore: slot.qualityScore,
+    throughputCapacity: slot.throughputCapacity,
+    throughputUsed: fleetState.used.get(slot.modelId) ?? 0,
+    utilization: slot.throughputCapacity > 0
+      ? Math.min(1, (fleetState.used.get(slot.modelId) ?? 0) / slot.throughputCapacity)
+      : 0,
+  }));
+
+  const autoScaleBoost = researchUnlocks.autoScalingBonus;
+  if (autoScaleBoost > 0) {
+    for (const tier of Object.keys(tierResults) as TrafficPriority[]) {
+      const metrics = tierResults[tier];
+      if (metrics.rejectedTokens > 0) {
+        const recovered = Math.min(metrics.rejectedTokens, metrics.rejectedTokens * autoScaleBoost);
+        tierResults[tier] = {
+          ...metrics,
+          servedTokens: metrics.servedTokens + recovered,
+          rejectedTokens: metrics.rejectedTokens - recovered,
+        };
+      }
+    }
+  }
+
+  return {
+    servingMetrics: {
+      tierMetrics: tierResults,
+      totalServed,
+      totalQueued,
+      totalRejected,
+      totalDegraded,
+      effectiveQuality,
+      avgLatencyMs,
+      modelUtilization,
+      batchApiTokensServed: batchTokensServed,
+      batchApiRevenue: batchRevenue,
+    },
+    batchApi: updatedBatchApi,
+    batchRevenue,
+  };
+}
+
+export function computeSatisfactionImpact(
+  metrics: TierServingMetrics,
+): number {
+  if (metrics.demandTokens <= 0) return 0;
+
+  const rejectedFraction = metrics.rejectedTokens / metrics.demandTokens;
+  const queuedFraction = metrics.queuedTokens / metrics.demandTokens;
+  const degradedFraction = metrics.servedTokens > 0 ? metrics.degradedTokens / metrics.servedTokens : 0;
+
+  const rejectionPenalty = rejectedFraction * REJECTION_SATISFACTION_PENALTY * 10;
+  const queuePenalty = queuedFraction * QUEUE_SATISFACTION_PENALTY * 10;
+  const degradationPenalty = degradedFraction * (1 - metrics.avgQualityDelivered) * DEGRADATION_SATISFACTION_PENALTY * 10;
+
+  return -(rejectionPenalty + queuePenalty + degradationPenalty);
+}
@@ -1,8 +1,9 @@
 import type { GameState } from '@ai-tycoon/shared';
 import { processMarketV2 } from './market/index';
+import type { ResearchBonuses } from './researchBonuses';

 export type { MarketTickResult } from './market/index';

-export function processMarket(state: GameState, currentTickCapacity: number) {
-  return processMarketV2(state, currentTickCapacity);
+export function processMarket(state: GameState, currentTickCapacity: number, effectiveInferenceFlops?: number, researchBonuses?: ResearchBonuses) {
+  return processMarketV2(state, currentTickCapacity, effectiveInferenceFlops, researchBonuses);
 }
@@ -18,6 +18,7 @@ export interface ResearchBonuses {
  reputationBonus: number;

  safetyBonus: number;
+  autoScalingBonus: number;
 }

 export function getResearchBonuses(completedResearch: string[]): ResearchBonuses {
@@ -37,6 +38,7 @@ export function getResearchBonuses(completedResearch: string[]): ResearchBonuses
    agentsBonus: 0,
    reputationBonus: 0,
    safetyBonus: 0,
+    autoScalingBonus: 0,
  };

  for (const id of completedResearch) {
@@ -53,6 +55,7 @@ export function getResearchBonuses(completedResearch: string[]): ResearchBonuses
            case 'pipeline_speed': bonuses.pipelineSpeedBonus += effect.value; break;
            case 'data_quality': bonuses.dataQualityBonus += effect.value; break;
            case 'sdk_coverage': bonuses.sdkCoverageBonus += effect.value; break;
+            case 'auto_scaling': bonuses.autoScalingBonus += effect.value; break;
          }
          break;
        case 'capability_boost':
@@ -56,7 +56,7 @@ export function processTick(state: GameState): Partial<GameState> {
  const stateWithModels = { ...stateWithInfra, models: modelResult.modelsState };

  const capacity = computeCapacity(state, infrastructure, researchBonuses);
-  const market = processMarket(stateWithModels, capacity.tokensPerSecondCapacity);
+  const market = processMarket(stateWithModels, capacity.tokensPerSecondCapacity, capacity.effectiveInferenceFlops, researchBonuses);
  const compute = finalizeCompute(capacity, market.totalTokenDemand);

  const talent = processTalent(stateWithModels);
@@ -118,6 +118,34 @@ export const FLOPS_TO_TOKENS_MULTIPLIER = 26;

 export const OVERLOAD_PENALTY_EXPONENT = 1.5;

+// --- Serving Pipeline ---
+
+export const REJECTION_SATISFACTION_PENALTY = 0.15;
+export const QUEUE_SATISFACTION_PENALTY = 0.05;
+export const DEGRADATION_SATISFACTION_PENALTY = 0.08;
+
+export const REJECTION_CHURN_MULTIPLIER = 3.0;
+export const QUEUE_CHURN_MULTIPLIER = 1.5;
+
+export const ENTERPRISE_REJECTION_SLA_MULTIPLIER = 3.0;
+
+export const FREE_TIER_REJECTION_TOLERANCE = 0.3;
+export const PAID_TIER_REJECTION_TOLERANCE = 0.05;
+
+export const MODEL_SIZE_THROUGHPUT_SCALER: Record<SizeTier, number> = {
+  nano: 10.0, small: 5.0, medium: 2.0, large: 1.2, flagship: 1.0,
+};
+
+export const BATCH_API_DEMAND_PER_DEV: Record<ApiTierId, number> = {
+  free: 0, payg: 2, scale: 20, 'enterprise-api': 100,
+};
+export const BATCH_API_DEFAULT_DISCOUNT = 0.5;
+export const BATCH_API_MAX_PENDING = 100_000;
+
+export const BATCHING_THROUGHPUT_FACTOR = 0.15;
+export const BASE_LATENCY_MS = 50;
+export const QUEUE_LATENCY_MS_PER_PERCENT = 5;
+
 export const ERA_THRESHOLDS = {
  scaleup: { revenue: 10_000, capability: 15, reputation: 30 },
  bigtech: { revenue: 1_000_000, capability: 50, reputation: 60 },
@@ -52,4 +52,4 @@ export const INITIAL_SETTINGS: GameSettings = {
  musicVolume: 0.5,
 };

-export const SAVE_VERSION = 8;
+export const SAVE_VERSION = 9;
@@ -170,13 +170,93 @@ export interface ObsolescenceState {
  newModelBoostRemaining: number;
 }

-// --- Overload Policy (kept from original) ---
+// --- Serving Pipeline & Overload Policy ---
+
+export type TrafficPriority = 'enterprise' | 'api-paid' | 'consumer-paid' | 'api-free' | 'consumer-free';
+export type RoutingStrategy = 'quality-first' | 'speed-first' | 'balanced';
+export type OverflowBehavior = 'queue' | 'reject' | 'degrade';
+
+export const TRAFFIC_PRIORITIES: TrafficPriority[] = ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'];

 export interface OverloadPolicy {
+  priorityOrder: TrafficPriority[];
+  overflowBehavior: Record<TrafficPriority, OverflowBehavior>;
  maxQueueDepth: number;
-  rateLimitPerCustomer: number;
-  degradeQualityUnderLoad: boolean;
-  prioritizeEnterprise: boolean;
+  rateLimitPerCustomer: Record<TrafficPriority, number>;
+  enterpriseReservation: number;
+  routingStrategy: RoutingStrategy;
+  autoDegradation: {
+    enabled: boolean;
+    triggerThreshold: number;
+    minQualityFloor: number;
+  };
+  batchApiEnabled: boolean;
+  batchApiDiscount: number;
+  batchApiMaxDelay: number;
+}
+
+export interface TierServingMetrics {
+  demandTokens: number;
+  servedTokens: number;
+  queuedTokens: number;
+  rejectedTokens: number;
+  degradedTokens: number;
+  avgQualityDelivered: number;
+}
+
+export interface ModelUtilizationEntry {
+  modelId: string;
+  modelName: string;
+  quantization: string | null;
+  qualityScore: number;
+  throughputCapacity: number;
+  throughputUsed: number;
+  utilization: number;
+}
+
+export interface ServingMetrics {
+  tierMetrics: Record<TrafficPriority, TierServingMetrics>;
+  totalServed: number;
+  totalQueued: number;
+  totalRejected: number;
+  totalDegraded: number;
+  effectiveQuality: number;
+  avgLatencyMs: number;
+  modelUtilization: ModelUtilizationEntry[];
+  batchApiTokensServed: number;
+  batchApiRevenue: number;
+}
+
+export interface BatchApiState {
+  totalBatchDemand: number;
+  pendingQueue: number;
+  servedLastTick: number;
+  revenue: number;
+}
+
+function makeEmptyTierMetrics(): TierServingMetrics {
+  return { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
+}
+
+export function makeInitialServingMetrics(): ServingMetrics {
+  return {
+    tierMetrics: {
+      'enterprise': makeEmptyTierMetrics(),
+      'api-paid': makeEmptyTierMetrics(),
+      'consumer-paid': makeEmptyTierMetrics(),
+      'api-free': makeEmptyTierMetrics(),
+      'consumer-free': makeEmptyTierMetrics(),
+    },
+    totalServed: 0,
+    totalQueued: 0,
+    totalRejected: 0,
+    totalDegraded: 0,
+    effectiveQuality: 1,
+    avgLatencyMs: 0,
+    modelUtilization: [],
+    batchApiTokensServed: 0,
+    batchApiRevenue: 0,
+  };
 }

 // --- Root Market State ---
@@ -193,6 +273,8 @@ export interface MarketState {
  seasonalMultiplier: number;
  obsolescence: ObsolescenceState;
  overloadPolicy: OverloadPolicy;
+  servingMetrics: ServingMetrics;
+  batchApi: BatchApiState;
  openSourcedModels: string[];
  subscriberHistory: { tick: number; subscribers: number }[];
 }
@@ -315,10 +397,39 @@ export const INITIAL_MARKET: MarketState = {
    newModelBoostRemaining: 0,
  },
  overloadPolicy: {
+    priorityOrder: ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'],
+    overflowBehavior: {
+      'enterprise': 'queue' as OverflowBehavior,
+      'api-paid': 'queue' as OverflowBehavior,
+      'consumer-paid': 'degrade' as OverflowBehavior,
+      'api-free': 'reject' as OverflowBehavior,
+      'consumer-free': 'reject' as OverflowBehavior,
+    },
    maxQueueDepth: 100,
-    rateLimitPerCustomer: 1000,
-    degradeQualityUnderLoad: false,
-    prioritizeEnterprise: true,
+    rateLimitPerCustomer: {
+      'enterprise': 10000,
+      'api-paid': 1000,
+      'consumer-paid': 500,
+      'api-free': 100,
+      'consumer-free': 50,
+    },
+    enterpriseReservation: 0.2,
+    routingStrategy: 'balanced' as RoutingStrategy,
+    autoDegradation: {
+      enabled: true,
+      triggerThreshold: 0.85,
+      minQualityFloor: 0.75,
+    },
+    batchApiEnabled: false,
+    batchApiDiscount: 0.5,
+    batchApiMaxDelay: 60,
+  },
+  servingMetrics: makeInitialServingMetrics(),
+  batchApi: {
+    totalBatchDemand: 0,
+    pendingQueue: 0,
+    servedLastTick: 0,
+    revenue: 0,
  },
  openSourcedModels: [],
  subscriberHistory: [],