Remove benchmark evaluation system, use training capabilities directly

Model quality for market segments and product lines now derives from deployed
model capabilities (coding, reasoning, agents, etc.) instead of requiring a
separate manual benchmark evaluation step. This eliminates an unbounded
benchmarkResults[] array that was scanned 5x per tick and removes ~480 lines
of dead-weight UI, types, and engine code.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 19:28:59 -04:00
parent db034687d6
commit bbb69a315c
10 changed files with 57 additions and 535 deletions
+2 -30
View File
@@ -15,7 +15,6 @@ import type {
TrainingPipeline, ModelFamily, DataMixAllocation,
ModelArchitecture, AlignmentMethod, SizeTier,
SFTSpecialization, QuantizationLevel, VariantCreationJob,
EvalJob,
ConsumerTierId, ApiTierId,
} from '@ai-tycoon/shared';
import {
@@ -43,7 +42,7 @@ import {
} from '@ai-tycoon/shared';
import {
emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary,
BENCHMARKS, TECH_TREE, onModelDeployed,
TECH_TREE, onModelDeployed,
} from '@ai-tycoon/game-engine';
import { INITIAL_RIVALS } from '@ai-tycoon/game-engine';
@@ -59,7 +58,7 @@ export interface InfraNav {
datacenterId?: string;
}
type ModelsTab = 'overview' | 'train' | 'models' | 'benchmarks' | 'products';
type ModelsTab = 'overview' | 'train' | 'models' | 'products';
interface UIState {
activePage: ActivePage;
@@ -132,7 +131,6 @@ interface Actions {
}) => void;
startPointRelease: (baseModelId: string) => void;
createQuantization: (baseModelId: string, level: QuantizationLevel, variantName: string) => void;
startEvaluation: (modelId: string, benchmarkIds: string[]) => void;
deployModel: (modelId: string) => void;
deployVariant: (familyId: string, variantId: string) => void;
setProductPricing: (productLineId: string, field: string, value: number) => void;
@@ -1076,32 +1074,6 @@ export const useGameStore = create<Store>()(
}
},
startEvaluation: (modelId, benchmarkIds) => {
let created = false;
set((s) => {
const benchmarks = BENCHMARKS.filter(b => benchmarkIds.includes(b.id));
if (benchmarks.length === 0) return s;
created = true;
const totalTicks = benchmarks.reduce((sum, b) => sum + b.ticksToRun, 0);
const computeCost = benchmarks.reduce((sum, b) => sum + b.computeCost, 0);
const job: EvalJob = {
id: uuid(),
modelId,
benchmarkIds,
progressTicks: 0,
totalTicks,
computeAllocated: computeCost,
status: 'active',
results: [],
};
return { models: { ...s.models, evalJobs: [...s.models.evalJobs, job] } };
});
if (created) {
get().addNotification({ title: 'Evaluation Started', message: `${benchmarkIds.length} benchmark${benchmarkIds.length > 1 ? 's' : ''} queued.`, type: 'info', tick: get().meta.tickCount });
set({ modelsTab: 'overview' as ModelsTab });
}
},
deployModel: (modelId) => {
const modelName = get().models.baseModels.find(m => m.id === modelId)?.name ?? 'Model';
set((s) => ({