Remove benchmark evaluation system, use training capabilities directly

Model quality for market segments and product lines now derives from deployed
model capabilities (coding, reasoning, agents, etc.) instead of requiring a
separate manual benchmark evaluation step. This eliminates an unbounded
benchmarkResults[] array that was scanned 5x per tick and removes ~480 lines
of dead-weight UI, types, and engine code.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 19:28:59 -04:00
parent db034687d6
commit bbb69a315c
10 changed files with 57 additions and 535 deletions
@@ -171,7 +171,6 @@ export function createTestBaseModel(overrides?: Partial<BaseModel>): BaseModel {
sizeTier: 'small',
isPointRelease: false,
sourceModelId: null,
benchmarkResults: {},
dataMix: { web: 0.4, code: 0.2, books: 0.15, academic: 0.1, conversational: 0.1, specialized: 0.05 },
};
return overrides ? { ...base, ...overrides } : base;
@@ -181,9 +180,10 @@ export function createTestModelFamily(overrides?: Partial<ModelFamily>): ModelFa
const base: ModelFamily = {
id: uuid(),
name: 'Test Family',
baseModels: [],
generation: 1,
baseModelIds: [],
variants: [],
activeEvals: [],
createdAtTick: 0,
};
return overrides ? { ...base, ...overrides } : base;
}