Replace decorative overload policy with real serving pipeline and dedicated Serving page
CI / build-and-push (push) Successful in 28s

The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read)
and trivial flat penalties. This replaces it with a full serving pipeline where deployed
models form a fleet, requests route through priority/degradation logic, and policy choices
create meaningful strategic tradeoffs.

New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers),
demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered
serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models
under load, and Batch API to fill idle capacity at discounted rates.

4 new research nodes gate features progressively: Intelligent Request Routing, Priority
Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline
metrics, model fleet utilization, and research-gated policy controls.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 12:42:09 -04:00
parent d7d77238b9
commit 901db02a6b
17 changed files with 1349 additions and 229 deletions
@@ -118,6 +118,34 @@ export const FLOPS_TO_TOKENS_MULTIPLIER = 26;
export const OVERLOAD_PENALTY_EXPONENT = 1.5;
// --- Serving Pipeline ---
export const REJECTION_SATISFACTION_PENALTY = 0.15;
export const QUEUE_SATISFACTION_PENALTY = 0.05;
export const DEGRADATION_SATISFACTION_PENALTY = 0.08;
export const REJECTION_CHURN_MULTIPLIER = 3.0;
export const QUEUE_CHURN_MULTIPLIER = 1.5;
export const ENTERPRISE_REJECTION_SLA_MULTIPLIER = 3.0;
export const FREE_TIER_REJECTION_TOLERANCE = 0.3;
export const PAID_TIER_REJECTION_TOLERANCE = 0.05;
export const MODEL_SIZE_THROUGHPUT_SCALER: Record<SizeTier, number> = {
nano: 10.0, small: 5.0, medium: 2.0, large: 1.2, flagship: 1.0,
};
export const BATCH_API_DEMAND_PER_DEV: Record<ApiTierId, number> = {
free: 0, payg: 2, scale: 20, 'enterprise-api': 100,
};
export const BATCH_API_DEFAULT_DISCOUNT = 0.5;
export const BATCH_API_MAX_PENDING = 100_000;
export const BATCHING_THROUGHPUT_FACTOR = 0.15;
export const BASE_LATENCY_MS = 50;
export const QUEUE_LATENCY_MS_PER_PERCENT = 5;
export const ERA_THRESHOLDS = {
scaleup: { revenue: 10_000, capability: 15, reputation: 30 },
bigtech: { revenue: 1_000_000, capability: 50, reputation: 60 },
+1 -1
View File
@@ -52,4 +52,4 @@ export const INITIAL_SETTINGS: GameSettings = {
musicVolume: 0.5,
};
export const SAVE_VERSION = 8;
export const SAVE_VERSION = 9;
+118 -7
View File
@@ -170,13 +170,93 @@ export interface ObsolescenceState {
newModelBoostRemaining: number;
}
// --- Overload Policy (kept from original) ---
// --- Serving Pipeline & Overload Policy ---
export type TrafficPriority = 'enterprise' | 'api-paid' | 'consumer-paid' | 'api-free' | 'consumer-free';
export type RoutingStrategy = 'quality-first' | 'speed-first' | 'balanced';
export type OverflowBehavior = 'queue' | 'reject' | 'degrade';
export const TRAFFIC_PRIORITIES: TrafficPriority[] = ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'];
export interface OverloadPolicy {
priorityOrder: TrafficPriority[];
overflowBehavior: Record<TrafficPriority, OverflowBehavior>;
maxQueueDepth: number;
rateLimitPerCustomer: number;
degradeQualityUnderLoad: boolean;
prioritizeEnterprise: boolean;
rateLimitPerCustomer: Record<TrafficPriority, number>;
enterpriseReservation: number;
routingStrategy: RoutingStrategy;
autoDegradation: {
enabled: boolean;
triggerThreshold: number;
minQualityFloor: number;
};
batchApiEnabled: boolean;
batchApiDiscount: number;
batchApiMaxDelay: number;
}
export interface TierServingMetrics {
demandTokens: number;
servedTokens: number;
queuedTokens: number;
rejectedTokens: number;
degradedTokens: number;
avgQualityDelivered: number;
}
export interface ModelUtilizationEntry {
modelId: string;
modelName: string;
quantization: string | null;
qualityScore: number;
throughputCapacity: number;
throughputUsed: number;
utilization: number;
}
export interface ServingMetrics {
tierMetrics: Record<TrafficPriority, TierServingMetrics>;
totalServed: number;
totalQueued: number;
totalRejected: number;
totalDegraded: number;
effectiveQuality: number;
avgLatencyMs: number;
modelUtilization: ModelUtilizationEntry[];
batchApiTokensServed: number;
batchApiRevenue: number;
}
export interface BatchApiState {
totalBatchDemand: number;
pendingQueue: number;
servedLastTick: number;
revenue: number;
}
function makeEmptyTierMetrics(): TierServingMetrics {
return { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
}
export function makeInitialServingMetrics(): ServingMetrics {
return {
tierMetrics: {
'enterprise': makeEmptyTierMetrics(),
'api-paid': makeEmptyTierMetrics(),
'consumer-paid': makeEmptyTierMetrics(),
'api-free': makeEmptyTierMetrics(),
'consumer-free': makeEmptyTierMetrics(),
},
totalServed: 0,
totalQueued: 0,
totalRejected: 0,
totalDegraded: 0,
effectiveQuality: 1,
avgLatencyMs: 0,
modelUtilization: [],
batchApiTokensServed: 0,
batchApiRevenue: 0,
};
}
// --- Root Market State ---
@@ -193,6 +273,8 @@ export interface MarketState {
seasonalMultiplier: number;
obsolescence: ObsolescenceState;
overloadPolicy: OverloadPolicy;
servingMetrics: ServingMetrics;
batchApi: BatchApiState;
openSourcedModels: string[];
subscriberHistory: { tick: number; subscribers: number }[];
}
@@ -315,10 +397,39 @@ export const INITIAL_MARKET: MarketState = {
newModelBoostRemaining: 0,
},
overloadPolicy: {
priorityOrder: ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'],
overflowBehavior: {
'enterprise': 'queue' as OverflowBehavior,
'api-paid': 'queue' as OverflowBehavior,
'consumer-paid': 'degrade' as OverflowBehavior,
'api-free': 'reject' as OverflowBehavior,
'consumer-free': 'reject' as OverflowBehavior,
},
maxQueueDepth: 100,
rateLimitPerCustomer: 1000,
degradeQualityUnderLoad: false,
prioritizeEnterprise: true,
rateLimitPerCustomer: {
'enterprise': 10000,
'api-paid': 1000,
'consumer-paid': 500,
'api-free': 100,
'consumer-free': 50,
},
enterpriseReservation: 0.2,
routingStrategy: 'balanced' as RoutingStrategy,
autoDegradation: {
enabled: true,
triggerThreshold: 0.85,
minQualityFloor: 0.75,
},
batchApiEnabled: false,
batchApiDiscount: 0.5,
batchApiMaxDelay: 60,
},
servingMetrics: makeInitialServingMetrics(),
batchApi: {
totalBatchDemand: 0,
pendingQueue: 0,
servedLastTick: 0,
revenue: 0,
},
openSourcedModels: [],
subscriberHistory: [],