Replace decorative overload policy with real serving pipeline and dedicated Serving page
CI / build-and-push (push) Successful in 28s
CI / build-and-push (push) Successful in 28s
The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read) and trivial flat penalties. This replaces it with a full serving pipeline where deployed models form a fleet, requests route through priority/degradation logic, and policy choices create meaningful strategic tradeoffs. New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers), demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models under load, and Batch API to fill idle capacity at discounted rates. 4 new research nodes gate features progressively: Intelligent Request Routing, Priority Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline metrics, model fleet utilization, and research-gated policy controls. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,6 +118,34 @@ export const FLOPS_TO_TOKENS_MULTIPLIER = 26;
|
||||
|
||||
export const OVERLOAD_PENALTY_EXPONENT = 1.5;
|
||||
|
||||
// --- Serving Pipeline ---
|
||||
|
||||
export const REJECTION_SATISFACTION_PENALTY = 0.15;
|
||||
export const QUEUE_SATISFACTION_PENALTY = 0.05;
|
||||
export const DEGRADATION_SATISFACTION_PENALTY = 0.08;
|
||||
|
||||
export const REJECTION_CHURN_MULTIPLIER = 3.0;
|
||||
export const QUEUE_CHURN_MULTIPLIER = 1.5;
|
||||
|
||||
export const ENTERPRISE_REJECTION_SLA_MULTIPLIER = 3.0;
|
||||
|
||||
export const FREE_TIER_REJECTION_TOLERANCE = 0.3;
|
||||
export const PAID_TIER_REJECTION_TOLERANCE = 0.05;
|
||||
|
||||
export const MODEL_SIZE_THROUGHPUT_SCALER: Record<SizeTier, number> = {
|
||||
nano: 10.0, small: 5.0, medium: 2.0, large: 1.2, flagship: 1.0,
|
||||
};
|
||||
|
||||
export const BATCH_API_DEMAND_PER_DEV: Record<ApiTierId, number> = {
|
||||
free: 0, payg: 2, scale: 20, 'enterprise-api': 100,
|
||||
};
|
||||
export const BATCH_API_DEFAULT_DISCOUNT = 0.5;
|
||||
export const BATCH_API_MAX_PENDING = 100_000;
|
||||
|
||||
export const BATCHING_THROUGHPUT_FACTOR = 0.15;
|
||||
export const BASE_LATENCY_MS = 50;
|
||||
export const QUEUE_LATENCY_MS_PER_PERCENT = 5;
|
||||
|
||||
export const ERA_THRESHOLDS = {
|
||||
scaleup: { revenue: 10_000, capability: 15, reputation: 30 },
|
||||
bigtech: { revenue: 1_000_000, capability: 50, reputation: 60 },
|
||||
|
||||
@@ -52,4 +52,4 @@ export const INITIAL_SETTINGS: GameSettings = {
|
||||
musicVolume: 0.5,
|
||||
};
|
||||
|
||||
export const SAVE_VERSION = 8;
|
||||
export const SAVE_VERSION = 9;
|
||||
|
||||
@@ -170,13 +170,93 @@ export interface ObsolescenceState {
|
||||
newModelBoostRemaining: number;
|
||||
}
|
||||
|
||||
// --- Overload Policy (kept from original) ---
|
||||
// --- Serving Pipeline & Overload Policy ---
|
||||
|
||||
export type TrafficPriority = 'enterprise' | 'api-paid' | 'consumer-paid' | 'api-free' | 'consumer-free';
|
||||
export type RoutingStrategy = 'quality-first' | 'speed-first' | 'balanced';
|
||||
export type OverflowBehavior = 'queue' | 'reject' | 'degrade';
|
||||
|
||||
export const TRAFFIC_PRIORITIES: TrafficPriority[] = ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'];
|
||||
|
||||
export interface OverloadPolicy {
|
||||
priorityOrder: TrafficPriority[];
|
||||
overflowBehavior: Record<TrafficPriority, OverflowBehavior>;
|
||||
maxQueueDepth: number;
|
||||
rateLimitPerCustomer: number;
|
||||
degradeQualityUnderLoad: boolean;
|
||||
prioritizeEnterprise: boolean;
|
||||
rateLimitPerCustomer: Record<TrafficPriority, number>;
|
||||
enterpriseReservation: number;
|
||||
routingStrategy: RoutingStrategy;
|
||||
autoDegradation: {
|
||||
enabled: boolean;
|
||||
triggerThreshold: number;
|
||||
minQualityFloor: number;
|
||||
};
|
||||
batchApiEnabled: boolean;
|
||||
batchApiDiscount: number;
|
||||
batchApiMaxDelay: number;
|
||||
}
|
||||
|
||||
export interface TierServingMetrics {
|
||||
demandTokens: number;
|
||||
servedTokens: number;
|
||||
queuedTokens: number;
|
||||
rejectedTokens: number;
|
||||
degradedTokens: number;
|
||||
avgQualityDelivered: number;
|
||||
}
|
||||
|
||||
export interface ModelUtilizationEntry {
|
||||
modelId: string;
|
||||
modelName: string;
|
||||
quantization: string | null;
|
||||
qualityScore: number;
|
||||
throughputCapacity: number;
|
||||
throughputUsed: number;
|
||||
utilization: number;
|
||||
}
|
||||
|
||||
export interface ServingMetrics {
|
||||
tierMetrics: Record<TrafficPriority, TierServingMetrics>;
|
||||
totalServed: number;
|
||||
totalQueued: number;
|
||||
totalRejected: number;
|
||||
totalDegraded: number;
|
||||
effectiveQuality: number;
|
||||
avgLatencyMs: number;
|
||||
modelUtilization: ModelUtilizationEntry[];
|
||||
batchApiTokensServed: number;
|
||||
batchApiRevenue: number;
|
||||
}
|
||||
|
||||
export interface BatchApiState {
|
||||
totalBatchDemand: number;
|
||||
pendingQueue: number;
|
||||
servedLastTick: number;
|
||||
revenue: number;
|
||||
}
|
||||
|
||||
function makeEmptyTierMetrics(): TierServingMetrics {
|
||||
return { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
|
||||
}
|
||||
|
||||
export function makeInitialServingMetrics(): ServingMetrics {
|
||||
return {
|
||||
tierMetrics: {
|
||||
'enterprise': makeEmptyTierMetrics(),
|
||||
'api-paid': makeEmptyTierMetrics(),
|
||||
'consumer-paid': makeEmptyTierMetrics(),
|
||||
'api-free': makeEmptyTierMetrics(),
|
||||
'consumer-free': makeEmptyTierMetrics(),
|
||||
},
|
||||
totalServed: 0,
|
||||
totalQueued: 0,
|
||||
totalRejected: 0,
|
||||
totalDegraded: 0,
|
||||
effectiveQuality: 1,
|
||||
avgLatencyMs: 0,
|
||||
modelUtilization: [],
|
||||
batchApiTokensServed: 0,
|
||||
batchApiRevenue: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// --- Root Market State ---
|
||||
@@ -193,6 +273,8 @@ export interface MarketState {
|
||||
seasonalMultiplier: number;
|
||||
obsolescence: ObsolescenceState;
|
||||
overloadPolicy: OverloadPolicy;
|
||||
servingMetrics: ServingMetrics;
|
||||
batchApi: BatchApiState;
|
||||
openSourcedModels: string[];
|
||||
subscriberHistory: { tick: number; subscribers: number }[];
|
||||
}
|
||||
@@ -315,10 +397,39 @@ export const INITIAL_MARKET: MarketState = {
|
||||
newModelBoostRemaining: 0,
|
||||
},
|
||||
overloadPolicy: {
|
||||
priorityOrder: ['enterprise', 'api-paid', 'consumer-paid', 'api-free', 'consumer-free'],
|
||||
overflowBehavior: {
|
||||
'enterprise': 'queue' as OverflowBehavior,
|
||||
'api-paid': 'queue' as OverflowBehavior,
|
||||
'consumer-paid': 'degrade' as OverflowBehavior,
|
||||
'api-free': 'reject' as OverflowBehavior,
|
||||
'consumer-free': 'reject' as OverflowBehavior,
|
||||
},
|
||||
maxQueueDepth: 100,
|
||||
rateLimitPerCustomer: 1000,
|
||||
degradeQualityUnderLoad: false,
|
||||
prioritizeEnterprise: true,
|
||||
rateLimitPerCustomer: {
|
||||
'enterprise': 10000,
|
||||
'api-paid': 1000,
|
||||
'consumer-paid': 500,
|
||||
'api-free': 100,
|
||||
'consumer-free': 50,
|
||||
},
|
||||
enterpriseReservation: 0.2,
|
||||
routingStrategy: 'balanced' as RoutingStrategy,
|
||||
autoDegradation: {
|
||||
enabled: true,
|
||||
triggerThreshold: 0.85,
|
||||
minQualityFloor: 0.75,
|
||||
},
|
||||
batchApiEnabled: false,
|
||||
batchApiDiscount: 0.5,
|
||||
batchApiMaxDelay: 60,
|
||||
},
|
||||
servingMetrics: makeInitialServingMetrics(),
|
||||
batchApi: {
|
||||
totalBatchDemand: 0,
|
||||
pendingQueue: 0,
|
||||
servedLastTick: 0,
|
||||
revenue: 0,
|
||||
},
|
||||
openSourcedModels: [],
|
||||
subscriberHistory: [],
|
||||
|
||||
Reference in New Issue
Block a user