Replace decorative overload policy with real serving pipeline and dedicated Serving page
CI / build-and-push (push) Successful in 28s
CI / build-and-push (push) Successful in 28s
The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read) and trivial flat penalties. This replaces it with a full serving pipeline where deployed models form a fleet, requests route through priority/degradation logic, and policy choices create meaningful strategic tradeoffs. New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers), demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models under load, and Batch API to fill idle capacity at discounted rates. 4 new research nodes gate features progressively: Intelligent Request Routing, Priority Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline metrics, model fleet utilization, and research-gated policy controls. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
import type { ConsumerTierState, ConsumerTierId } from '@ai-tycoon/shared';
|
||||
import type { ConsumerTierState, ConsumerTierId, TierServingMetrics } from '@ai-tycoon/shared';
|
||||
import {
|
||||
CONSUMER_TIER_ORDER,
|
||||
CONVERSION_RATES,
|
||||
TIER_CHURN_RATES,
|
||||
FREE_TIER_ADOPTION_RATE,
|
||||
CONSUMER_TOKENS_PER_SUBSCRIBER,
|
||||
OVERLOAD_PENALTY_EXPONENT,
|
||||
NETWORK_DEGRADATION,
|
||||
REJECTION_CHURN_MULTIPLIER,
|
||||
QUEUE_CHURN_MULTIPLIER,
|
||||
} from '@ai-tycoon/shared';
|
||||
|
||||
export interface ConsumerTickResult {
|
||||
@@ -20,9 +21,9 @@ export function processConsumerTiers(
|
||||
playerConsumerCustomers: number,
|
||||
modelQuality: number,
|
||||
seasonalConsumerMultiplier: number,
|
||||
demandCapacityRatio: number,
|
||||
networkLatencyPenalty: number,
|
||||
overloadPolicy: { degradeQualityUnderLoad: boolean; prioritizeEnterprise: boolean },
|
||||
consumerPaidMetrics: TierServingMetrics,
|
||||
consumerFreeMetrics: TierServingMetrics,
|
||||
): ConsumerTickResult {
|
||||
const updated = {
|
||||
tiers: { ...tiers.tiers },
|
||||
@@ -97,26 +98,64 @@ export function processConsumerTiers(
|
||||
|
||||
updated.totalUsers = totalUsers;
|
||||
|
||||
const paidDemand = consumerPaidMetrics.demandTokens;
|
||||
const freeDemand = consumerFreeMetrics.demandTokens;
|
||||
const totalDemand = paidDemand + freeDemand;
|
||||
|
||||
let servingPenalty = 0;
|
||||
if (totalDemand > 0) {
|
||||
const totalRejected = consumerPaidMetrics.rejectedTokens + consumerFreeMetrics.rejectedTokens;
|
||||
const totalQueued = consumerPaidMetrics.queuedTokens + consumerFreeMetrics.queuedTokens;
|
||||
const rejectedFraction = totalRejected / totalDemand;
|
||||
const queuedFraction = totalQueued / totalDemand;
|
||||
|
||||
servingPenalty = rejectedFraction * 1.5 + queuedFraction * 0.5;
|
||||
|
||||
const avgQuality = totalDemand > 0
|
||||
? (consumerPaidMetrics.avgQualityDelivered * paidDemand + consumerFreeMetrics.avgQualityDelivered * freeDemand) / totalDemand
|
||||
: modelQuality;
|
||||
const qualityGap = Math.max(0, modelQuality - avgQuality);
|
||||
servingPenalty += qualityGap * 0.8;
|
||||
|
||||
if (consumerFreeMetrics.rejectedTokens > 0 && freeDemand > 0) {
|
||||
const freeRejectRate = consumerFreeMetrics.rejectedTokens / freeDemand;
|
||||
const extraChurn = updated.tiers.free.userCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
|
||||
updated.tiers.free.userCount = Math.max(0, updated.tiers.free.userCount - extraChurn);
|
||||
}
|
||||
|
||||
if (consumerPaidMetrics.rejectedTokens > 0 && paidDemand > 0) {
|
||||
const paidRejectRate = consumerPaidMetrics.rejectedTokens / paidDemand;
|
||||
for (const id of CONSUMER_TIER_ORDER) {
|
||||
if (id === 'free') continue;
|
||||
const extraChurn = updated.tiers[id].userCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
|
||||
updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
|
||||
}
|
||||
}
|
||||
|
||||
if (totalQueued > 0) {
|
||||
for (const id of CONSUMER_TIER_ORDER) {
|
||||
const extraChurn = updated.tiers[id].userCount * queuedFraction * 0.002 * QUEUE_CHURN_MULTIPLIER;
|
||||
updated.tiers[id].userCount = Math.max(0, updated.tiers[id].userCount - extraChurn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let headroomBonus = 0;
|
||||
let overloadPenalty = 0;
|
||||
if (demandCapacityRatio <= 1) {
|
||||
headroomBonus = (1 - demandCapacityRatio) * 0.2;
|
||||
if (totalDemand > 0) {
|
||||
const totalServed = consumerPaidMetrics.servedTokens + consumerFreeMetrics.servedTokens;
|
||||
const servedFraction = totalServed / totalDemand;
|
||||
if (servedFraction > 0.95) {
|
||||
headroomBonus = (servedFraction - 0.95) * 4;
|
||||
}
|
||||
} else {
|
||||
overloadPenalty = Math.min(1, Math.pow(demandCapacityRatio - 1, OVERLOAD_PENALTY_EXPONENT));
|
||||
headroomBonus = 0.1;
|
||||
}
|
||||
|
||||
const netLatencyPenalty = networkLatencyPenalty * NETWORK_DEGRADATION.satisfactionPenaltyPerLatency;
|
||||
updated.satisfaction = Math.min(1, Math.max(0,
|
||||
0.3 + modelQuality * 0.5 + headroomBonus - overloadPenalty - netLatencyPenalty,
|
||||
0.3 + modelQuality * 0.5 + headroomBonus - servingPenalty - netLatencyPenalty,
|
||||
));
|
||||
|
||||
if (overloadPolicy.degradeQualityUnderLoad && demandCapacityRatio > 0.85) {
|
||||
updated.satisfaction = Math.max(0, updated.satisfaction - 0.02);
|
||||
}
|
||||
if (overloadPolicy.prioritizeEnterprise && demandCapacityRatio > 0.9) {
|
||||
updated.satisfaction = Math.max(0, updated.satisfaction - 0.01);
|
||||
}
|
||||
|
||||
updated.viralCoefficient = modelQuality > 0.5 ? 1 + (modelQuality - 0.5) * 2 : 0;
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user