Files
AIHostingTycoon/apps/web/src/pages/ServingPage.tsx
T
josh 901db02a6b
CI / build-and-push (push) Successful in 28s
Replace decorative overload policy with real serving pipeline and dedicated Serving page
The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read)
and trivial flat penalties. This replaces it with a full serving pipeline where deployed
models form a fleet, requests route through priority/degradation logic, and policy choices
create meaningful strategic tradeoffs.

New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers),
demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered
serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models
under load, and Batch API to fill idle capacity at discounted rates.

4 new research nodes gate features progressively: Intelligent Request Routing, Priority
Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline
metrics, model fleet utilization, and research-gated policy controls.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-25 12:42:09 -04:00

485 lines
20 KiB
TypeScript

import { useGameStore } from '@/store';
import {
formatNumber, formatPercent,
type TrafficPriority, type OverflowBehavior, type RoutingStrategy,
TRAFFIC_PRIORITIES,
} from '@ai-tycoon/shared';
import {
Activity, Shield, Clock, CheckCircle, XCircle, Layers,
AlertTriangle, Zap, Server, ArrowRight,
} from 'lucide-react';
const TIER_COLORS: Record<TrafficPriority, string> = {
'enterprise': 'text-purple-400',
'api-paid': 'text-blue-400',
'consumer-paid': 'text-green-400',
'api-free': 'text-yellow-400',
'consumer-free': 'text-surface-400',
};
const TIER_BG: Record<TrafficPriority, string> = {
'enterprise': 'bg-purple-500/20',
'api-paid': 'bg-blue-500/20',
'consumer-paid': 'bg-green-500/20',
'api-free': 'bg-yellow-500/20',
'consumer-free': 'bg-surface-500/20',
};
const TIER_LABELS: Record<TrafficPriority, string> = {
'enterprise': 'Enterprise',
'api-paid': 'API Paid',
'consumer-paid': 'Consumer Paid',
'api-free': 'API Free',
'consumer-free': 'Consumer Free',
};
const OVERFLOW_OPTIONS: { value: OverflowBehavior; label: string }[] = [
{ value: 'queue', label: 'Queue' },
{ value: 'reject', label: 'Reject' },
{ value: 'degrade', label: 'Degrade' },
];
const ROUTING_OPTIONS: { value: RoutingStrategy; label: string; desc: string }[] = [
{ value: 'quality-first', label: 'Quality First', desc: 'Best model first — maximizes quality' },
{ value: 'balanced', label: 'Balanced', desc: 'Adapts to load — quality when idle, speed when busy' },
{ value: 'speed-first', label: 'Speed First', desc: 'Fastest model first — maximizes throughput' },
];
function MetricCard({ icon: Icon, label, value, sub, color }: {
icon: typeof Activity; label: string; value: string; sub?: string; color: string;
}) {
return (
<div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
<div className="flex items-center gap-2 mb-2">
<Icon size={16} className={color} />
<span className="text-xs text-surface-400 uppercase">{label}</span>
</div>
<div className="text-2xl font-bold font-mono">{value}</div>
{sub && <div className="text-xs text-surface-400 mt-1">{sub}</div>}
</div>
);
}
function PipelineFlow() {
const sm = useGameStore(s => s.market.servingMetrics);
const tiers = sm.tierMetrics;
return (
<div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
<h3 className="font-semibold flex items-center gap-2 mb-4">
<ArrowRight size={16} />
Request Pipeline
</h3>
<div className="overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="text-xs text-surface-400 uppercase">
<th className="text-left py-2 px-2">Tier</th>
<th className="text-right py-2 px-2">Demand</th>
<th className="text-right py-2 px-2">Served</th>
<th className="text-right py-2 px-2">Queued</th>
<th className="text-right py-2 px-2">Rejected</th>
<th className="text-right py-2 px-2">Degraded</th>
<th className="text-right py-2 px-2">Quality</th>
</tr>
</thead>
<tbody>
{TRAFFIC_PRIORITIES.map(tier => {
const m = tiers[tier];
if (!m || m.demandTokens === 0) return (
<tr key={tier} className="border-t border-surface-800">
<td className={`py-2 px-2 font-medium ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</td>
<td className="text-right py-2 px-2 text-surface-500"></td>
<td className="text-right py-2 px-2 text-surface-500"></td>
<td className="text-right py-2 px-2 text-surface-500"></td>
<td className="text-right py-2 px-2 text-surface-500"></td>
<td className="text-right py-2 px-2 text-surface-500"></td>
<td className="text-right py-2 px-2 text-surface-500"></td>
</tr>
);
return (
<tr key={tier} className="border-t border-surface-800">
<td className={`py-2 px-2 font-medium ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</td>
<td className="text-right py-2 px-2 font-mono">{formatNumber(m.demandTokens)}</td>
<td className="text-right py-2 px-2 font-mono text-green-400">{formatNumber(m.servedTokens)}</td>
<td className="text-right py-2 px-2 font-mono text-yellow-400">{m.queuedTokens > 0 ? formatNumber(m.queuedTokens) : '—'}</td>
<td className="text-right py-2 px-2 font-mono text-red-400">{m.rejectedTokens > 0 ? formatNumber(m.rejectedTokens) : '—'}</td>
<td className="text-right py-2 px-2 font-mono text-orange-400">{m.degradedTokens > 0 ? formatNumber(m.degradedTokens) : '—'}</td>
<td className="text-right py-2 px-2 font-mono">{formatPercent(m.avgQualityDelivered)}</td>
</tr>
);
})}
</tbody>
</table>
</div>
</div>
);
}
function ModelFleetPanel() {
const utilization = useGameStore(s => s.market.servingMetrics.modelUtilization);
if (utilization.length === 0) {
return (
<div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
<h3 className="font-semibold flex items-center gap-2 mb-3">
<Server size={16} />
Model Fleet
</h3>
<p className="text-sm text-surface-500">No models deployed. Train and deploy models to start serving requests.</p>
</div>
);
}
return (
<div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
<h3 className="font-semibold flex items-center gap-2 mb-3">
<Server size={16} />
Model Fleet
</h3>
<div className="space-y-2">
{utilization.map(m => (
<div key={m.modelId} className="flex items-center gap-3">
<div className="w-40 truncate text-sm">
<span className="font-medium">{m.modelName}</span>
{m.quantization && <span className="text-xs text-surface-400 ml-1">({m.quantization.toUpperCase()})</span>}
</div>
<div className="flex-1">
<div className="h-3 bg-surface-800 rounded-full overflow-hidden">
<div
className={`h-full rounded-full transition-all ${
m.utilization > 0.9 ? 'bg-red-500' : m.utilization > 0.7 ? 'bg-yellow-500' : 'bg-green-500'
}`}
style={{ width: `${Math.min(100, m.utilization * 100)}%` }}
/>
</div>
</div>
<div className="w-12 text-right text-xs font-mono">{formatPercent(m.utilization)}</div>
<div className="w-16 text-right text-xs text-surface-400">Q:{(m.qualityScore * 100).toFixed(0)}</div>
<div className="w-20 text-right text-xs text-surface-400">{formatNumber(m.throughputCapacity)} t/s</div>
</div>
))}
</div>
</div>
);
}
function PolicyControls() {
const policy = useGameStore(s => s.market.overloadPolicy);
const setPolicy = useGameStore(s => s.setOverloadPolicy);
const completedResearch = useGameStore(s => s.research?.completedResearch ?? []);
const hasRouting = completedResearch.includes('request-routing');
const hasPriorityQueues = completedResearch.includes('priority-queues');
const hasBatching = completedResearch.includes('request-batching');
const hasAutoScaling = completedResearch.includes('auto-scaling');
return (
<div className="bg-surface-900 border border-surface-700 rounded-xl p-4 space-y-4">
<h3 className="font-semibold flex items-center gap-2">
<Layers size={16} />
Policy Controls
</h3>
{/* Always available: Enterprise Reservation */}
<div>
<label className="block text-xs text-surface-400 mb-1">Enterprise Capacity Reservation</label>
<div className="flex items-center gap-3">
<input
type="range"
min={0} max={50} step={5}
value={policy.enterpriseReservation * 100}
onChange={e => setPolicy({ enterpriseReservation: Number(e.target.value) / 100 })}
className="flex-1 accent-accent"
/>
<span className="w-12 text-right font-mono text-sm">{(policy.enterpriseReservation * 100).toFixed(0)}%</span>
</div>
<p className="text-[10px] text-surface-500 mt-0.5">Reserve capacity for enterprise SLAs protects contracts but limits other tiers</p>
</div>
{/* Always available: Auto-Degradation toggle */}
<div>
<label className="flex items-center gap-2 text-sm cursor-pointer">
<input
type="checkbox"
checked={policy.autoDegradation.enabled}
onChange={e => setPolicy({
autoDegradation: { ...policy.autoDegradation, enabled: e.target.checked },
})}
className="accent-accent"
/>
<span className="text-surface-300">Auto-Degradation</span>
<span className="text-[10px] text-surface-500">Fall back to faster models under load</span>
</label>
{hasAutoScaling && policy.autoDegradation.enabled && (
<div className="mt-2 ml-6 space-y-2">
<div>
<label className="block text-xs text-surface-400 mb-1">Trigger Threshold</label>
<div className="flex items-center gap-3">
<input
type="range"
min={70} max={95} step={5}
value={policy.autoDegradation.triggerThreshold * 100}
onChange={e => setPolicy({
autoDegradation: { ...policy.autoDegradation, triggerThreshold: Number(e.target.value) / 100 },
})}
className="flex-1 accent-accent"
/>
<span className="w-12 text-right font-mono text-sm">{(policy.autoDegradation.triggerThreshold * 100).toFixed(0)}%</span>
</div>
</div>
<div>
<label className="block text-xs text-surface-400 mb-1">Minimum Quality Floor</label>
<div className="flex items-center gap-3">
<input
type="range"
min={50} max={100} step={5}
value={policy.autoDegradation.minQualityFloor * 100}
onChange={e => setPolicy({
autoDegradation: { ...policy.autoDegradation, minQualityFloor: Number(e.target.value) / 100 },
})}
className="flex-1 accent-accent"
/>
<span className="w-12 text-right font-mono text-sm">{(policy.autoDegradation.minQualityFloor * 100).toFixed(0)}%</span>
</div>
</div>
</div>
)}
</div>
{/* Routing Strategy — requires research */}
{hasRouting ? (
<div>
<label className="block text-xs text-surface-400 mb-2">Routing Strategy</label>
<div className="grid grid-cols-3 gap-2">
{ROUTING_OPTIONS.map(opt => (
<button
key={opt.value}
onClick={() => setPolicy({ routingStrategy: opt.value })}
className={`p-2 rounded-lg border text-sm text-left transition-colors ${
policy.routingStrategy === opt.value
? 'border-accent bg-accent/10 text-accent-light'
: 'border-surface-700 text-surface-300 hover:border-surface-600'
}`}
>
<div className="font-medium">{opt.label}</div>
<div className="text-[10px] text-surface-500 mt-0.5">{opt.desc}</div>
</button>
))}
</div>
</div>
) : (
<div className="flex items-center gap-2 text-xs text-surface-500 bg-surface-800 rounded-lg p-3">
<AlertTriangle size={14} />
Research "Intelligent Request Routing" to unlock routing strategies and per-tier rate limits
</div>
)}
{/* Priority & Overflow — requires research */}
{hasPriorityQueues ? (
<div>
<label className="block text-xs text-surface-400 mb-2">Per-Tier Overflow Behavior</label>
<div className="space-y-1.5">
{TRAFFIC_PRIORITIES.map(tier => (
<div key={tier} className="flex items-center gap-3">
<span className={`w-32 text-sm ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</span>
<select
value={policy.overflowBehavior[tier]}
onChange={e => setPolicy({
overflowBehavior: {
...policy.overflowBehavior,
[tier]: e.target.value as OverflowBehavior,
},
})}
className="bg-surface-800 border border-surface-600 rounded px-2 py-1 text-sm"
>
{OVERFLOW_OPTIONS.map(opt => (
<option key={opt.value} value={opt.value}>{opt.label}</option>
))}
</select>
</div>
))}
</div>
<div className="mt-3">
<label className="block text-xs text-surface-400 mb-1">Max Queue Depth</label>
<div className="flex items-center gap-3">
<input
type="range"
min={10} max={500} step={10}
value={policy.maxQueueDepth}
onChange={e => setPolicy({ maxQueueDepth: Number(e.target.value) })}
className="flex-1 accent-accent"
/>
<span className="w-16 text-right font-mono text-sm">{policy.maxQueueDepth}</span>
</div>
</div>
</div>
) : !hasRouting ? null : (
<div className="flex items-center gap-2 text-xs text-surface-500 bg-surface-800 rounded-lg p-3">
<AlertTriangle size={14} />
Research "Priority Queue System" to unlock per-tier overflow behavior and queue controls
</div>
)}
{/* Batch API — requires research */}
{hasBatching ? (
<div>
<label className="flex items-center gap-2 text-sm cursor-pointer">
<input
type="checkbox"
checked={policy.batchApiEnabled}
onChange={e => setPolicy({ batchApiEnabled: e.target.checked })}
className="accent-accent"
/>
<span className="text-surface-300">Batch API</span>
<span className="text-[10px] text-surface-500">Fill idle capacity with discounted batch requests</span>
</label>
{policy.batchApiEnabled && (
<div className="mt-2 ml-6">
<label className="block text-xs text-surface-400 mb-1">Batch Discount</label>
<div className="flex items-center gap-3">
<input
type="range"
min={30} max={70} step={5}
value={policy.batchApiDiscount * 100}
onChange={e => setPolicy({ batchApiDiscount: Number(e.target.value) / 100 })}
className="flex-1 accent-accent"
/>
<span className="w-12 text-right font-mono text-sm">{(policy.batchApiDiscount * 100).toFixed(0)}%</span>
</div>
<p className="text-[10px] text-surface-500 mt-0.5">Higher discount = more batch demand, lower per-token revenue</p>
</div>
)}
</div>
) : hasRouting ? (
<div className="flex items-center gap-2 text-xs text-surface-500 bg-surface-800 rounded-lg p-3">
<AlertTriangle size={14} />
Research "Request Batching" to unlock the Batch API product line
</div>
) : null}
{/* Rate limits — requires routing research */}
{hasRouting && (
<div>
<label className="block text-xs text-surface-400 mb-2">Per-Tier Rate Limits (tok/s per customer)</label>
<div className="space-y-1.5">
{TRAFFIC_PRIORITIES.map(tier => (
<div key={tier} className="flex items-center gap-3">
<span className={`w-32 text-sm ${TIER_COLORS[tier]}`}>{TIER_LABELS[tier]}</span>
<input
type="number"
value={policy.rateLimitPerCustomer[tier]}
onChange={e => {
const v = Number(e.target.value);
if (v >= 10) {
setPolicy({
rateLimitPerCustomer: {
...policy.rateLimitPerCustomer,
[tier]: v,
},
});
}
}}
className="w-28 bg-surface-800 border border-surface-600 rounded px-2 py-1 text-sm font-mono"
min={10}
step={100}
/>
</div>
))}
</div>
</div>
)}
</div>
);
}
function BatchApiPanel() {
const batch = useGameStore(s => s.market.batchApi);
const sm = useGameStore(s => s.market.servingMetrics);
const policy = useGameStore(s => s.market.overloadPolicy);
const completedResearch = useGameStore(s => s.research?.completedResearch ?? []);
if (!completedResearch.includes('request-batching') || !policy.batchApiEnabled) return null;
return (
<div className="bg-surface-900 border border-surface-700 rounded-xl p-4">
<h3 className="font-semibold flex items-center gap-2 mb-3">
<Zap size={16} className="text-blue-400" />
Batch API
</h3>
<div className="grid grid-cols-3 gap-4 text-sm">
<div>
<div className="text-xs text-surface-400">Pending Queue</div>
<div className="font-mono">{formatNumber(batch.pendingQueue)} tok</div>
</div>
<div>
<div className="text-xs text-surface-400">Served Last Tick</div>
<div className="font-mono text-green-400">{formatNumber(batch.servedLastTick)} tok</div>
</div>
<div>
<div className="text-xs text-surface-400">Revenue</div>
<div className="font-mono text-accent">${sm.batchApiRevenue.toFixed(4)}/tick</div>
</div>
</div>
</div>
);
}
export function ServingPage() {
const sm = useGameStore(s => s.market.servingMetrics);
const compute = useGameStore(s => s.compute);
const totalDemand = sm.totalServed + sm.totalQueued + sm.totalRejected;
const successRate = totalDemand > 0 ? sm.totalServed / totalDemand : 1;
return (
<div className="space-y-4">
<h2 className="text-2xl font-bold">Serving Pipeline</h2>
{/* Top metrics */}
<div className="grid grid-cols-4 gap-4">
<MetricCard
icon={Activity}
label="Throughput"
value={`${formatNumber(compute.tokensPerSecondDemand)} / ${formatNumber(compute.tokensPerSecondCapacity)}`}
sub={`${formatPercent(compute.inferenceUtilization)} utilization`}
color="text-blue-400"
/>
<MetricCard
icon={Shield}
label="Effective Quality"
value={formatPercent(sm.effectiveQuality)}
sub="Weighted avg quality delivered"
color="text-green-400"
/>
<MetricCard
icon={Clock}
label="Avg Latency"
value={`${sm.avgLatencyMs.toFixed(0)}ms`}
sub={sm.totalQueued > 0 ? `${formatNumber(sm.totalQueued)} queued` : 'No queuing'}
color="text-yellow-400"
/>
<MetricCard
icon={sm.totalRejected > 0 ? XCircle : CheckCircle}
label="Success Rate"
value={formatPercent(successRate)}
sub={sm.totalRejected > 0 ? `${formatNumber(sm.totalRejected)} rejected` : 'All requests served'}
color={sm.totalRejected > 0 ? 'text-red-400' : 'text-green-400'}
/>
</div>
{/* Pipeline flow table */}
<PipelineFlow />
{/* Batch API metrics */}
<BatchApiPanel />
{/* Bottom row: controls + fleet */}
<div className="grid grid-cols-2 gap-4">
<PolicyControls />
<ModelFleetPanel />
</div>
</div>
);
}