Remove benchmark evaluation system, use training capabilities directly

Model quality for market segments and product lines now derives from deployed model capabilities (coding, reasoning, agents, etc.) instead of requiring a separate manual benchmark evaluation step. This eliminates an unbounded benchmarkResults[] array that was scanned 5x per tick and removes ~480 lines of dead-weight UI, types, and engine code. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-26 19:28:59 -04:00
parent db034687d6
commit bbb69a315c
10 changed files with 57 additions and 535 deletions
@@ -171,7 +171,6 @@ export function createTestBaseModel(overrides?: Partial<BaseModel>): BaseModel {
    sizeTier: 'small',
    isPointRelease: false,
    sourceModelId: null,
-    benchmarkResults: {},
    dataMix: { web: 0.4, code: 0.2, books: 0.15, academic: 0.1, conversational: 0.1, specialized: 0.05 },
  };
  return overrides ? { ...base, ...overrides } : base;
@@ -181,9 +180,10 @@ export function createTestModelFamily(overrides?: Partial<ModelFamily>): ModelFa
  const base: ModelFamily = {
    id: uuid(),
    name: 'Test Family',
-    baseModels: [],
+    generation: 1,
+    baseModelIds: [],
    variants: [],
-    activeEvals: [],
+    createdAtTick: 0,
  };
  return overrides ? { ...base, ...overrides } : base;
 }