|
|
|
@@ -6,7 +6,7 @@ import type {
|
|
|
|
|
ModelUtilizationEntry,
|
|
|
|
|
BatchApiState,
|
|
|
|
|
} from '@ai-tycoon/shared';
|
|
|
|
|
import type { BaseModel, ModelVariant, ModelFamily, ModelsState, SizeTier } from '@ai-tycoon/shared';
|
|
|
|
|
import type { BaseModel, ModelsState, SizeTier } from '@ai-tycoon/shared';
|
|
|
|
|
import {
|
|
|
|
|
MODEL_SIZE_THROUGHPUT_SCALER,
|
|
|
|
|
MOE_SPEED_MULTIPLIER,
|
|
|
|
@@ -62,73 +62,133 @@ export interface ServingPipelineResult {
|
|
|
|
|
batchRevenue: number;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface CachedSlot {
|
|
|
|
|
modelId: string;
|
|
|
|
|
modelName: string;
|
|
|
|
|
sizeTier: SizeTier;
|
|
|
|
|
isVariant: boolean;
|
|
|
|
|
quantization: string | null;
|
|
|
|
|
qualityScore: number;
|
|
|
|
|
speedMultiplier: number;
|
|
|
|
|
throughputMultiplier: number;
|
|
|
|
|
isMoE: boolean;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let cachedDeploymentVersion = -1;
|
|
|
|
|
let cachedSlots: CachedSlot[] = [];
|
|
|
|
|
const fleetOutput: ModelServingSlot[] = [];
|
|
|
|
|
|
|
|
|
|
const mainRemaining = new Map<string, number>();
|
|
|
|
|
const mainUsed = new Map<string, number>();
|
|
|
|
|
const entRemaining = new Map<string, number>();
|
|
|
|
|
const entUsed = new Map<string, number>();
|
|
|
|
|
|
|
|
|
|
let cachedUtilization: ModelUtilizationEntry[] = [];
|
|
|
|
|
|
|
|
|
|
export function resetFleetCache(): void {
|
|
|
|
|
cachedDeploymentVersion = -1;
|
|
|
|
|
cachedSlots.length = 0;
|
|
|
|
|
fleetOutput.length = 0;
|
|
|
|
|
mainRemaining.clear();
|
|
|
|
|
mainUsed.clear();
|
|
|
|
|
entRemaining.clear();
|
|
|
|
|
entUsed.clear();
|
|
|
|
|
cachedUtilization.length = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function buildModelFleet(
|
|
|
|
|
modelsState: ModelsState,
|
|
|
|
|
effectiveInferenceFlops: number,
|
|
|
|
|
): ModelServingSlot[] {
|
|
|
|
|
const slots: ModelServingSlot[] = [];
|
|
|
|
|
const version = modelsState.deploymentVersion;
|
|
|
|
|
|
|
|
|
|
const deployedBases: BaseModel[] = [];
|
|
|
|
|
const baseModelById = new Map<string, BaseModel>();
|
|
|
|
|
for (const m of modelsState.baseModels) {
|
|
|
|
|
if (m.isDeployed) deployedBases.push(m);
|
|
|
|
|
baseModelById.set(m.id, m);
|
|
|
|
|
}
|
|
|
|
|
if (version !== cachedDeploymentVersion) {
|
|
|
|
|
cachedSlots.length = 0;
|
|
|
|
|
|
|
|
|
|
const deployedVariants: { variant: ModelVariant; baseModel: BaseModel }[] = [];
|
|
|
|
|
for (const family of modelsState.families) {
|
|
|
|
|
for (const variant of family.variants) {
|
|
|
|
|
if (!variant.isDeployed) continue;
|
|
|
|
|
const base = baseModelById.get(variant.baseModelId);
|
|
|
|
|
if (base) deployedVariants.push({ variant, baseModel: base });
|
|
|
|
|
const baseModelById = new Map<string, BaseModel>();
|
|
|
|
|
for (const m of modelsState.baseModels) {
|
|
|
|
|
baseModelById.set(m.id, m);
|
|
|
|
|
if (!m.isDeployed) continue;
|
|
|
|
|
const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[m.sizeTier] ?? 1.0;
|
|
|
|
|
const moeFactor = m.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
|
|
|
|
|
cachedSlots.push({
|
|
|
|
|
modelId: m.id,
|
|
|
|
|
modelName: m.name,
|
|
|
|
|
sizeTier: m.sizeTier,
|
|
|
|
|
isVariant: false,
|
|
|
|
|
quantization: null,
|
|
|
|
|
qualityScore: m.rawCapability / 100,
|
|
|
|
|
speedMultiplier: moeFactor,
|
|
|
|
|
throughputMultiplier: FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor,
|
|
|
|
|
isMoE: m.architecture.type === 'moe',
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const family of modelsState.families) {
|
|
|
|
|
for (const variant of family.variants) {
|
|
|
|
|
if (!variant.isDeployed) continue;
|
|
|
|
|
const base = baseModelById.get(variant.baseModelId);
|
|
|
|
|
if (!base) continue;
|
|
|
|
|
const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[base.sizeTier] ?? 1.0;
|
|
|
|
|
const moeFactor = variant.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
|
|
|
|
|
const quantConfig = variant.quantization ? QUANTIZATION_CONFIGS[variant.quantization] : null;
|
|
|
|
|
const quantSpeedFactor = quantConfig?.speedMultiplier ?? 1.0;
|
|
|
|
|
const qualityRetention = quantConfig?.qualityRetention ?? 1.0;
|
|
|
|
|
cachedSlots.push({
|
|
|
|
|
modelId: variant.id,
|
|
|
|
|
modelName: variant.name,
|
|
|
|
|
sizeTier: base.sizeTier,
|
|
|
|
|
isVariant: true,
|
|
|
|
|
quantization: variant.quantization ?? null,
|
|
|
|
|
qualityScore: (base.rawCapability / 100) * qualityRetention,
|
|
|
|
|
speedMultiplier: moeFactor * quantSpeedFactor,
|
|
|
|
|
throughputMultiplier: FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor,
|
|
|
|
|
isMoE: variant.architecture.type === 'moe',
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cachedDeploymentVersion = version;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const totalDeployed = deployedBases.length + deployedVariants.length;
|
|
|
|
|
if (totalDeployed === 0 || effectiveInferenceFlops <= 0) return slots;
|
|
|
|
|
const totalDeployed = cachedSlots.length;
|
|
|
|
|
if (totalDeployed === 0 || effectiveInferenceFlops <= 0) {
|
|
|
|
|
fleetOutput.length = 0;
|
|
|
|
|
return fleetOutput;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const flopsPerModel = effectiveInferenceFlops / totalDeployed;
|
|
|
|
|
|
|
|
|
|
for (const model of deployedBases) {
|
|
|
|
|
const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[model.sizeTier] ?? 1.0;
|
|
|
|
|
const moeFactor = model.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
|
|
|
|
|
const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor;
|
|
|
|
|
|
|
|
|
|
slots.push({
|
|
|
|
|
modelId: model.id,
|
|
|
|
|
modelName: model.name,
|
|
|
|
|
sizeTier: model.sizeTier,
|
|
|
|
|
isVariant: false,
|
|
|
|
|
quantization: null,
|
|
|
|
|
qualityScore: model.rawCapability / 100,
|
|
|
|
|
speedMultiplier: moeFactor,
|
|
|
|
|
throughputCapacity: throughput,
|
|
|
|
|
isMoE: model.architecture.type === 'moe',
|
|
|
|
|
});
|
|
|
|
|
fleetOutput.length = totalDeployed;
|
|
|
|
|
for (let i = 0; i < totalDeployed; i++) {
|
|
|
|
|
const cs = cachedSlots[i];
|
|
|
|
|
const existing = fleetOutput[i];
|
|
|
|
|
if (existing) {
|
|
|
|
|
existing.modelId = cs.modelId;
|
|
|
|
|
existing.modelName = cs.modelName;
|
|
|
|
|
existing.sizeTier = cs.sizeTier;
|
|
|
|
|
existing.isVariant = cs.isVariant;
|
|
|
|
|
existing.quantization = cs.quantization;
|
|
|
|
|
existing.qualityScore = cs.qualityScore;
|
|
|
|
|
existing.speedMultiplier = cs.speedMultiplier;
|
|
|
|
|
existing.throughputCapacity = flopsPerModel * cs.throughputMultiplier;
|
|
|
|
|
existing.isMoE = cs.isMoE;
|
|
|
|
|
} else {
|
|
|
|
|
fleetOutput[i] = {
|
|
|
|
|
modelId: cs.modelId,
|
|
|
|
|
modelName: cs.modelName,
|
|
|
|
|
sizeTier: cs.sizeTier,
|
|
|
|
|
isVariant: cs.isVariant,
|
|
|
|
|
quantization: cs.quantization,
|
|
|
|
|
qualityScore: cs.qualityScore,
|
|
|
|
|
speedMultiplier: cs.speedMultiplier,
|
|
|
|
|
throughputCapacity: flopsPerModel * cs.throughputMultiplier,
|
|
|
|
|
isMoE: cs.isMoE,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const { variant, baseModel } of deployedVariants) {
|
|
|
|
|
const sizeFactor = MODEL_SIZE_THROUGHPUT_SCALER[baseModel.sizeTier] ?? 1.0;
|
|
|
|
|
const moeFactor = variant.architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER : 1.0;
|
|
|
|
|
const quantConfig = variant.quantization ? QUANTIZATION_CONFIGS[variant.quantization] : null;
|
|
|
|
|
const quantSpeedFactor = quantConfig?.speedMultiplier ?? 1.0;
|
|
|
|
|
const qualityRetention = quantConfig?.qualityRetention ?? 1.0;
|
|
|
|
|
const throughput = flopsPerModel * FLOPS_TO_TOKENS_MULTIPLIER * sizeFactor * moeFactor * quantSpeedFactor;
|
|
|
|
|
|
|
|
|
|
slots.push({
|
|
|
|
|
modelId: variant.id,
|
|
|
|
|
modelName: variant.name,
|
|
|
|
|
sizeTier: baseModel.sizeTier,
|
|
|
|
|
isVariant: true,
|
|
|
|
|
quantization: variant.quantization ?? null,
|
|
|
|
|
qualityScore: (baseModel.rawCapability / 100) * qualityRetention,
|
|
|
|
|
speedMultiplier: moeFactor * quantSpeedFactor,
|
|
|
|
|
throughputCapacity: throughput,
|
|
|
|
|
isMoE: variant.architecture.type === 'moe',
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return slots;
|
|
|
|
|
return fleetOutput;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function sortFleetByStrategy(
|
|
|
|
@@ -136,24 +196,23 @@ function sortFleetByStrategy(
|
|
|
|
|
strategy: string,
|
|
|
|
|
overallUtilization: number,
|
|
|
|
|
): ModelServingSlot[] {
|
|
|
|
|
const sorted = [...fleet];
|
|
|
|
|
switch (strategy) {
|
|
|
|
|
case 'quality-first':
|
|
|
|
|
sorted.sort((a, b) => b.qualityScore - a.qualityScore);
|
|
|
|
|
fleet.sort((a, b) => b.qualityScore - a.qualityScore);
|
|
|
|
|
break;
|
|
|
|
|
case 'speed-first':
|
|
|
|
|
sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
|
|
|
|
|
fleet.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
|
|
|
|
|
break;
|
|
|
|
|
case 'balanced':
|
|
|
|
|
default:
|
|
|
|
|
if (overallUtilization > 0.8) {
|
|
|
|
|
sorted.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
|
|
|
|
|
fleet.sort((a, b) => b.throughputCapacity - a.throughputCapacity);
|
|
|
|
|
} else {
|
|
|
|
|
sorted.sort((a, b) => b.qualityScore - a.qualityScore);
|
|
|
|
|
fleet.sort((a, b) => b.qualityScore - a.qualityScore);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return sorted;
|
|
|
|
|
return fleet;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface FleetState {
|
|
|
|
@@ -250,7 +309,8 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
|
|
|
|
|
const { modelsState, effectiveInferenceFlops, overloadPolicy, demandByTier, batchApi, modelQuality, researchUnlocks } = input;
|
|
|
|
|
|
|
|
|
|
const fleet = buildModelFleet(modelsState, effectiveInferenceFlops);
|
|
|
|
|
const totalFleetCapacity = fleet.reduce((sum, s) => sum + s.throughputCapacity, 0);
|
|
|
|
|
let totalFleetCapacity = 0;
|
|
|
|
|
for (const s of fleet) totalFleetCapacity += s.throughputCapacity;
|
|
|
|
|
|
|
|
|
|
if (fleet.length === 0 || totalFleetCapacity <= 0) {
|
|
|
|
|
const metrics = makeInitialServingMetrics();
|
|
|
|
@@ -275,7 +335,7 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const totalDemand = Object.values(demandByTier).reduce((s, v) => s + v, 0);
|
|
|
|
|
const totalDemand = demandByTier.enterprise + demandByTier['api-paid'] + demandByTier['consumer-paid'] + demandByTier['api-free'] + demandByTier['consumer-free'];
|
|
|
|
|
const overallUtilization = totalFleetCapacity > 0 ? totalDemand / totalFleetCapacity : 0;
|
|
|
|
|
|
|
|
|
|
const effectiveStrategy = researchUnlocks.servingRoutingUnlocked
|
|
|
|
@@ -284,10 +344,13 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
|
|
|
|
|
|
|
|
|
|
const sortedFleet = sortFleetByStrategy(fleet, effectiveStrategy, overallUtilization);
|
|
|
|
|
|
|
|
|
|
const fleetState: FleetState = {
|
|
|
|
|
remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
|
|
|
|
|
used: new Map(fleet.map(s => [s.modelId, 0])),
|
|
|
|
|
};
|
|
|
|
|
mainRemaining.clear();
|
|
|
|
|
mainUsed.clear();
|
|
|
|
|
for (const s of fleet) {
|
|
|
|
|
mainRemaining.set(s.modelId, s.throughputCapacity);
|
|
|
|
|
mainUsed.set(s.modelId, 0);
|
|
|
|
|
}
|
|
|
|
|
const fleetState: FleetState = { remaining: mainRemaining, used: mainUsed };
|
|
|
|
|
|
|
|
|
|
const reservedCapacity = totalFleetCapacity * overloadPolicy.enterpriseReservation;
|
|
|
|
|
const enterpriseDemand = demandByTier['enterprise'] ?? 0;
|
|
|
|
@@ -310,10 +373,13 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
|
|
|
|
|
const nonEnterpriseTiers = effectivePriorityOrder.filter(t => t !== 'enterprise');
|
|
|
|
|
|
|
|
|
|
if (enterpriseDemand > 0) {
|
|
|
|
|
const enterpriseFleetState: FleetState = {
|
|
|
|
|
remaining: new Map(fleet.map(s => [s.modelId, s.throughputCapacity])),
|
|
|
|
|
used: new Map(fleet.map(s => [s.modelId, 0])),
|
|
|
|
|
};
|
|
|
|
|
entRemaining.clear();
|
|
|
|
|
entUsed.clear();
|
|
|
|
|
for (const s of fleet) {
|
|
|
|
|
entRemaining.set(s.modelId, s.throughputCapacity);
|
|
|
|
|
entUsed.set(s.modelId, 0);
|
|
|
|
|
}
|
|
|
|
|
const enterpriseFleetState: FleetState = { remaining: entRemaining, used: entUsed };
|
|
|
|
|
|
|
|
|
|
const reserveLimit = reservedCapacity > 0 ? reservedCapacity : totalFleetCapacity;
|
|
|
|
|
let budgetLeft = reserveLimit;
|
|
|
|
@@ -334,10 +400,10 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
for (const slot of fleet) {
|
|
|
|
|
const entUsed = enterpriseFleetState.used.get(slot.modelId) ?? 0;
|
|
|
|
|
const mainRemaining = fleetState.remaining.get(slot.modelId) ?? 0;
|
|
|
|
|
fleetState.remaining.set(slot.modelId, Math.max(0, mainRemaining - entUsed + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0)));
|
|
|
|
|
fleetState.used.set(slot.modelId, entUsed);
|
|
|
|
|
const entUsedForModel = enterpriseFleetState.used.get(slot.modelId) ?? 0;
|
|
|
|
|
const mainRemainingForModel = fleetState.remaining.get(slot.modelId) ?? 0;
|
|
|
|
|
fleetState.remaining.set(slot.modelId, Math.max(0, mainRemainingForModel - entUsedForModel + (reservedCapacity > 0 ? reservedCapacity / fleet.length : 0)));
|
|
|
|
|
fleetState.used.set(slot.modelId, entUsedForModel);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
tierResults['enterprise'] = { demandTokens: 0, servedTokens: 0, queuedTokens: 0, rejectedTokens: 0, degradedTokens: 0, avgQualityDelivered: 1 };
|
|
|
|
@@ -390,34 +456,50 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
|
|
|
|
|
updatedBatchApi.revenue = batchRevenue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const totalServed = Object.values(tierResults).reduce((s, t) => s + t.servedTokens, 0);
|
|
|
|
|
const totalQueued = Object.values(tierResults).reduce((s, t) => s + t.queuedTokens, 0);
|
|
|
|
|
const totalRejected = Object.values(tierResults).reduce((s, t) => s + t.rejectedTokens, 0);
|
|
|
|
|
const totalDegraded = Object.values(tierResults).reduce((s, t) => s + t.degradedTokens, 0);
|
|
|
|
|
|
|
|
|
|
let effectiveQuality = modelQuality;
|
|
|
|
|
if (totalServed > 0) {
|
|
|
|
|
let qualitySum = 0;
|
|
|
|
|
for (const t of Object.values(tierResults)) {
|
|
|
|
|
qualitySum += t.avgQualityDelivered * t.servedTokens;
|
|
|
|
|
}
|
|
|
|
|
effectiveQuality = qualitySum / totalServed;
|
|
|
|
|
let totalServed = 0;
|
|
|
|
|
let totalQueued = 0;
|
|
|
|
|
let totalRejected = 0;
|
|
|
|
|
let totalDegraded = 0;
|
|
|
|
|
let qualitySum = 0;
|
|
|
|
|
for (const tier of effectivePriorityOrder) {
|
|
|
|
|
const t = tierResults[tier];
|
|
|
|
|
if (!t) continue;
|
|
|
|
|
totalServed += t.servedTokens;
|
|
|
|
|
totalQueued += t.queuedTokens;
|
|
|
|
|
totalRejected += t.rejectedTokens;
|
|
|
|
|
totalDegraded += t.degradedTokens;
|
|
|
|
|
qualitySum += t.avgQualityDelivered * t.servedTokens;
|
|
|
|
|
}
|
|
|
|
|
const effectiveQuality = totalServed > 0 ? qualitySum / totalServed : modelQuality;
|
|
|
|
|
|
|
|
|
|
const queuedFraction = totalDemand > 0 ? totalQueued / totalDemand : 0;
|
|
|
|
|
const avgLatencyMs = BASE_LATENCY_MS + queuedFraction * 100 * QUEUE_LATENCY_MS_PER_PERCENT;
|
|
|
|
|
|
|
|
|
|
const modelUtilization: ModelUtilizationEntry[] = fleet.map(slot => ({
|
|
|
|
|
modelId: slot.modelId,
|
|
|
|
|
modelName: slot.modelName,
|
|
|
|
|
quantization: slot.quantization,
|
|
|
|
|
qualityScore: slot.qualityScore,
|
|
|
|
|
throughputCapacity: slot.throughputCapacity,
|
|
|
|
|
throughputUsed: fleetState.used.get(slot.modelId) ?? 0,
|
|
|
|
|
utilization: slot.throughputCapacity > 0
|
|
|
|
|
? Math.min(1, (fleetState.used.get(slot.modelId) ?? 0) / slot.throughputCapacity)
|
|
|
|
|
: 0,
|
|
|
|
|
}));
|
|
|
|
|
cachedUtilization.length = fleet.length;
|
|
|
|
|
for (let i = 0; i < fleet.length; i++) {
|
|
|
|
|
const slot = fleet[i];
|
|
|
|
|
const used = fleetState.used.get(slot.modelId) ?? 0;
|
|
|
|
|
const existing = cachedUtilization[i];
|
|
|
|
|
if (existing) {
|
|
|
|
|
existing.modelId = slot.modelId;
|
|
|
|
|
existing.modelName = slot.modelName;
|
|
|
|
|
existing.quantization = slot.quantization;
|
|
|
|
|
existing.qualityScore = slot.qualityScore;
|
|
|
|
|
existing.throughputCapacity = slot.throughputCapacity;
|
|
|
|
|
existing.throughputUsed = used;
|
|
|
|
|
existing.utilization = slot.throughputCapacity > 0 ? Math.min(1, used / slot.throughputCapacity) : 0;
|
|
|
|
|
} else {
|
|
|
|
|
cachedUtilization[i] = {
|
|
|
|
|
modelId: slot.modelId,
|
|
|
|
|
modelName: slot.modelName,
|
|
|
|
|
quantization: slot.quantization,
|
|
|
|
|
qualityScore: slot.qualityScore,
|
|
|
|
|
throughputCapacity: slot.throughputCapacity,
|
|
|
|
|
throughputUsed: used,
|
|
|
|
|
utilization: slot.throughputCapacity > 0 ? Math.min(1, used / slot.throughputCapacity) : 0,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const autoScaleBoost = researchUnlocks.autoScalingBonus;
|
|
|
|
|
if (autoScaleBoost > 0) {
|
|
|
|
@@ -443,7 +525,7 @@ export function processServingPipeline(input: ServingPipelineInput): ServingPipe
|
|
|
|
|
totalDegraded,
|
|
|
|
|
effectiveQuality,
|
|
|
|
|
avgLatencyMs,
|
|
|
|
|
modelUtilization,
|
|
|
|
|
modelUtilization: cachedUtilization,
|
|
|
|
|
batchApiTokensServed: batchTokensServed,
|
|
|
|
|
batchApiRevenue: batchRevenue,
|
|
|
|
|
},
|
|
|
|
|