Remove benchmark evaluation system, use training capabilities directly
Model quality for market segments and product lines now derives from deployed model capabilities (coding, reasoning, agents, etc.) instead of requiring a separate manual benchmark evaluation step. This eliminates an unbounded benchmarkResults[] array that was scanned 5x per tick and removes ~480 lines of dead-weight UI, types, and engine code. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
import type { GameState, MarketState, BenchmarkResult } from '@ai-tycoon/shared';
|
||||
import type { GameState, MarketState, ModelCapabilities } from '@ai-tycoon/shared';
|
||||
import { CONSUMER_TOKENS_PER_SUBSCRIBER, API_TOKENS_PER_DEVELOPER_PER_TICK, BATCH_API_DEMAND_PER_DEV, makeInitialServingMetrics } from '@ai-tycoon/shared';
|
||||
import type { TrafficPriority, TierServingMetrics } from '@ai-tycoon/shared';
|
||||
import { BENCHMARKS } from '../../data/benchmarks';
|
||||
import { computeSeasonal } from './seasonalSystem';
|
||||
import { updateObsolescence } from './obsolescenceSystem';
|
||||
import { buildPlayerProfile, buildCompetitorProfile, computeMarketShares, updateTAMGrowth } from './tamSystem';
|
||||
@@ -21,31 +20,30 @@ export interface MarketTickResult {
|
||||
totalTokenDemand: number;
|
||||
}
|
||||
|
||||
const SEGMENT_CAPABILITY_WEIGHTS: Record<string, Partial<Record<keyof ModelCapabilities, number>>> = {
|
||||
consumer: { creative: 0.35, knowledge: 0.25, reasoning: 0.15, multimodal: 0.15, coding: 0.05, agents: 0.05 },
|
||||
enterprise: { reasoning: 0.25, coding: 0.20, agents: 0.20, knowledge: 0.15, math: 0.10, multimodal: 0.10 },
|
||||
developer: { coding: 0.35, reasoning: 0.20, agents: 0.20, math: 0.15, knowledge: 0.10 },
|
||||
research: { reasoning: 0.30, math: 0.30, knowledge: 0.20, coding: 0.10, agents: 0.10 },
|
||||
};
|
||||
|
||||
function getSegmentQuality(
|
||||
segment: 'consumer' | 'enterprise' | 'developer' | 'research',
|
||||
benchmarkResults: BenchmarkResult[],
|
||||
capabilities: ModelCapabilities,
|
||||
fallbackScore: number,
|
||||
): number {
|
||||
if (benchmarkResults.length === 0) return fallbackScore / 100;
|
||||
|
||||
const bestByBenchmark = new Map<string, number>();
|
||||
for (const r of benchmarkResults) {
|
||||
const prev = bestByBenchmark.get(r.benchmarkId) ?? 0;
|
||||
if (r.score > prev) bestByBenchmark.set(r.benchmarkId, r.score);
|
||||
}
|
||||
|
||||
const weights = SEGMENT_CAPABILITY_WEIGHTS[segment];
|
||||
if (!weights) return fallbackScore / 100;
|
||||
let weightedSum = 0;
|
||||
let totalWeight = 0;
|
||||
for (const bench of BENCHMARKS) {
|
||||
const score = bestByBenchmark.get(bench.id);
|
||||
if (score == null) continue;
|
||||
const weight = bench.marketRelevance[segment];
|
||||
weightedSum += (score / 100) * weight;
|
||||
totalWeight += weight;
|
||||
for (const [cap, weight] of Object.entries(weights)) {
|
||||
const score = capabilities[cap as keyof ModelCapabilities] ?? 0;
|
||||
if (score > 0) {
|
||||
weightedSum += (score / 100) * weight;
|
||||
totalWeight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
if (totalWeight === 0) return fallbackScore / 100;
|
||||
return weightedSum / totalWeight;
|
||||
return totalWeight > 0 ? weightedSum / totalWeight : fallbackScore / 100;
|
||||
}
|
||||
|
||||
export function processMarketV2(
|
||||
@@ -54,9 +52,11 @@ export function processMarketV2(
|
||||
effectiveInferenceFlops?: number,
|
||||
researchBonuses?: ResearchBonuses,
|
||||
): MarketTickResult {
|
||||
const consumerQuality = getSegmentQuality('consumer', state.models.benchmarkResults, state.models.bestDeployedModelScore);
|
||||
const enterpriseQuality = getSegmentQuality('enterprise', state.models.benchmarkResults, state.models.bestDeployedModelScore);
|
||||
const modelQuality = state.models.benchmarkResults.length > 0
|
||||
const caps = state.models.bestDeployedCapabilities;
|
||||
const hasDeployed = state.models.bestDeployedModelScore > 0;
|
||||
const consumerQuality = getSegmentQuality('consumer', caps, state.models.bestDeployedModelScore);
|
||||
const enterpriseQuality = getSegmentQuality('enterprise', caps, state.models.bestDeployedModelScore);
|
||||
const modelQuality = hasDeployed
|
||||
? (consumerQuality + enterpriseQuality) / 2
|
||||
: state.models.bestDeployedModelScore / 100;
|
||||
|
||||
@@ -115,7 +115,7 @@ export function processMarketV2(
|
||||
const productResult = processProductLines(
|
||||
state.market.codeAssistant,
|
||||
state.market.agentsPlatform,
|
||||
state.models.benchmarkResults,
|
||||
caps,
|
||||
playerDevCustomers,
|
||||
playerEntCustomers,
|
||||
seasonal.multipliers.consumer,
|
||||
|
||||
Reference in New Issue
Block a user