Replace decorative overload policy with real serving pipeline and dedicated Serving page
CI / build-and-push (push) Successful in 28s
CI / build-and-push (push) Successful in 28s
The old overload policy had dead controls (maxQueueDepth, rateLimitPerCustomer never read) and trivial flat penalties. This replaces it with a full serving pipeline where deployed models form a fleet, requests route through priority/degradation logic, and policy choices create meaningful strategic tradeoffs. New serving pipeline: fleet building from deployed models (size/quant/MoE multipliers), demand categorization by 5 priority tiers, enterprise capacity reservation, priority-ordered serving with overflow behaviors (queue/reject/degrade), auto-degradation to faster models under load, and Batch API to fill idle capacity at discounted rates. 4 new research nodes gate features progressively: Intelligent Request Routing, Priority Queue System, Request Batching, and Auto-Scaling. New dedicated Serving page with pipeline metrics, model fleet utilization, and research-gated policy controls. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
import type { ApiTierState, ApiTierId, DeveloperEcosystem } from '@ai-tycoon/shared';
|
||||
import type { ApiTierState, ApiTierId, DeveloperEcosystem, TierServingMetrics } from '@ai-tycoon/shared';
|
||||
import {
|
||||
API_TIER_ORDER,
|
||||
API_CONVERSION_RATES,
|
||||
API_TIER_CHURN_RATES,
|
||||
API_TOKENS_PER_DEVELOPER_PER_TICK,
|
||||
REJECTION_CHURN_MULTIPLIER,
|
||||
} from '@ai-tycoon/shared';
|
||||
|
||||
export interface ApiTickResult {
|
||||
@@ -18,6 +19,8 @@ export function processApiTiers(
|
||||
modelQuality: number,
|
||||
seasonalApiMultiplier: number,
|
||||
ecosystem: DeveloperEcosystem,
|
||||
apiPaidMetrics: TierServingMetrics,
|
||||
apiFreeMetrics: TierServingMetrics,
|
||||
): ApiTickResult {
|
||||
const updated: ApiTierState = {
|
||||
tiers: { ...tiers.tiers },
|
||||
@@ -89,6 +92,23 @@ export function processApiTiers(
|
||||
updated.totalDevelopers = totalDevelopers;
|
||||
updated.totalTokensPerTick = totalTokens;
|
||||
|
||||
const freeRejectRate = apiFreeMetrics.demandTokens > 0
|
||||
? apiFreeMetrics.rejectedTokens / apiFreeMetrics.demandTokens : 0;
|
||||
if (freeRejectRate > 0) {
|
||||
const extraChurn = updated.tiers.free.developerCount * freeRejectRate * 0.01 * REJECTION_CHURN_MULTIPLIER;
|
||||
updated.tiers.free.developerCount = Math.max(0, updated.tiers.free.developerCount - extraChurn);
|
||||
}
|
||||
|
||||
const paidRejectRate = apiPaidMetrics.demandTokens > 0
|
||||
? apiPaidMetrics.rejectedTokens / apiPaidMetrics.demandTokens : 0;
|
||||
if (paidRejectRate > 0) {
|
||||
for (const id of API_TIER_ORDER) {
|
||||
if (id === 'free') continue;
|
||||
const extraChurn = updated.tiers[id].developerCount * paidRejectRate * 0.005 * REJECTION_CHURN_MULTIPLIER;
|
||||
updated.tiers[id].developerCount = Math.max(0, updated.tiers[id].developerCount - extraChurn);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
apiTiers: updated,
|
||||
apiRevenue: Math.max(0, apiRevenue),
|
||||
|
||||
Reference in New Issue
Block a user