Remove benchmark evaluation system, use training capabilities directly

Model quality for market segments and product lines now derives from deployed model capabilities (coding, reasoning, agents, etc.) instead of requiring a separate manual benchmark evaluation step. This eliminates an unbounded benchmarkResults[] array that was scanned 5x per tick and removes ~480 lines of dead-weight UI, types, and engine code. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-26 19:28:59 -04:00
parent db034687d6
commit bbb69a315c
10 changed files with 57 additions and 535 deletions
@@ -15,7 +15,6 @@ import type {
  TrainingPipeline, ModelFamily, DataMixAllocation,
  ModelArchitecture, AlignmentMethod, SizeTier,
  SFTSpecialization, QuantizationLevel, VariantCreationJob,
-  EvalJob,
  ConsumerTierId, ApiTierId,
 } from '@ai-tycoon/shared';
 import {
@@ -43,7 +42,7 @@ import {
 } from '@ai-tycoon/shared';
 import {
  emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary,
-  BENCHMARKS, TECH_TREE, onModelDeployed,
+  TECH_TREE, onModelDeployed,
 } from '@ai-tycoon/game-engine';
 import { INITIAL_RIVALS } from '@ai-tycoon/game-engine';

@@ -59,7 +58,7 @@ export interface InfraNav {
  datacenterId?: string;
 }

-type ModelsTab = 'overview' | 'train' | 'models' | 'benchmarks' | 'products';
+type ModelsTab = 'overview' | 'train' | 'models' | 'products';

 interface UIState {
  activePage: ActivePage;
@@ -132,7 +131,6 @@ interface Actions {
  }) => void;
  startPointRelease: (baseModelId: string) => void;
  createQuantization: (baseModelId: string, level: QuantizationLevel, variantName: string) => void;
-  startEvaluation: (modelId: string, benchmarkIds: string[]) => void;
  deployModel: (modelId: string) => void;
  deployVariant: (familyId: string, variantId: string) => void;
  setProductPricing: (productLineId: string, field: string, value: number) => void;
@@ -1076,32 +1074,6 @@ export const useGameStore = create<Store>()(
        }
      },

-      startEvaluation: (modelId, benchmarkIds) => {
-        let created = false;
-        set((s) => {
-          const benchmarks = BENCHMARKS.filter(b => benchmarkIds.includes(b.id));
-          if (benchmarks.length === 0) return s;
-          created = true;
-          const totalTicks = benchmarks.reduce((sum, b) => sum + b.ticksToRun, 0);
-          const computeCost = benchmarks.reduce((sum, b) => sum + b.computeCost, 0);
-          const job: EvalJob = {
-            id: uuid(),
-            modelId,
-            benchmarkIds,
-            progressTicks: 0,
-            totalTicks,
-            computeAllocated: computeCost,
-            status: 'active',
-            results: [],
-          };
-          return { models: { ...s.models, evalJobs: [...s.models.evalJobs, job] } };
-        });
-        if (created) {
-          get().addNotification({ title: 'Evaluation Started', message: `${benchmarkIds.length} benchmark${benchmarkIds.length > 1 ? 's' : ''} queued.`, type: 'info', tick: get().meta.tickCount });
-          set({ modelsTab: 'overview' as ModelsTab });
-        }
-      },
-
      deployModel: (modelId) => {
        const modelName = get().models.baseModels.find(m => m.id === modelId)?.name ?? 'Model';
        set((s) => ({