Remove benchmark evaluation system, use training capabilities directly
Model quality for market segments and product lines now derives from deployed model capabilities (coding, reasoning, agents, etc.) instead of requiring a separate manual benchmark evaluation step. This eliminates an unbounded benchmarkResults[] array that was scanned 5x per tick and removes ~480 lines of dead-weight UI, types, and engine code. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,7 +15,6 @@ import type {
|
||||
TrainingPipeline, ModelFamily, DataMixAllocation,
|
||||
ModelArchitecture, AlignmentMethod, SizeTier,
|
||||
SFTSpecialization, QuantizationLevel, VariantCreationJob,
|
||||
EvalJob,
|
||||
ConsumerTierId, ApiTierId,
|
||||
} from '@ai-tycoon/shared';
|
||||
import {
|
||||
@@ -43,7 +42,7 @@ import {
|
||||
} from '@ai-tycoon/shared';
|
||||
import {
|
||||
emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary,
|
||||
BENCHMARKS, TECH_TREE, onModelDeployed,
|
||||
TECH_TREE, onModelDeployed,
|
||||
} from '@ai-tycoon/game-engine';
|
||||
import { INITIAL_RIVALS } from '@ai-tycoon/game-engine';
|
||||
|
||||
@@ -59,7 +58,7 @@ export interface InfraNav {
|
||||
datacenterId?: string;
|
||||
}
|
||||
|
||||
type ModelsTab = 'overview' | 'train' | 'models' | 'benchmarks' | 'products';
|
||||
type ModelsTab = 'overview' | 'train' | 'models' | 'products';
|
||||
|
||||
interface UIState {
|
||||
activePage: ActivePage;
|
||||
@@ -132,7 +131,6 @@ interface Actions {
|
||||
}) => void;
|
||||
startPointRelease: (baseModelId: string) => void;
|
||||
createQuantization: (baseModelId: string, level: QuantizationLevel, variantName: string) => void;
|
||||
startEvaluation: (modelId: string, benchmarkIds: string[]) => void;
|
||||
deployModel: (modelId: string) => void;
|
||||
deployVariant: (familyId: string, variantId: string) => void;
|
||||
setProductPricing: (productLineId: string, field: string, value: number) => void;
|
||||
@@ -1076,32 +1074,6 @@ export const useGameStore = create<Store>()(
|
||||
}
|
||||
},
|
||||
|
||||
startEvaluation: (modelId, benchmarkIds) => {
|
||||
let created = false;
|
||||
set((s) => {
|
||||
const benchmarks = BENCHMARKS.filter(b => benchmarkIds.includes(b.id));
|
||||
if (benchmarks.length === 0) return s;
|
||||
created = true;
|
||||
const totalTicks = benchmarks.reduce((sum, b) => sum + b.ticksToRun, 0);
|
||||
const computeCost = benchmarks.reduce((sum, b) => sum + b.computeCost, 0);
|
||||
const job: EvalJob = {
|
||||
id: uuid(),
|
||||
modelId,
|
||||
benchmarkIds,
|
||||
progressTicks: 0,
|
||||
totalTicks,
|
||||
computeAllocated: computeCost,
|
||||
status: 'active',
|
||||
results: [],
|
||||
};
|
||||
return { models: { ...s.models, evalJobs: [...s.models.evalJobs, job] } };
|
||||
});
|
||||
if (created) {
|
||||
get().addNotification({ title: 'Evaluation Started', message: `${benchmarkIds.length} benchmark${benchmarkIds.length > 1 ? 's' : ''} queued.`, type: 'info', tick: get().meta.tickCount });
|
||||
set({ modelsTab: 'overview' as ModelsTab });
|
||||
}
|
||||
},
|
||||
|
||||
deployModel: (modelId) => {
|
||||
const modelName = get().models.baseModels.find(m => m.id === modelId)?.name ?? 'Model';
|
||||
set((s) => ({
|
||||
|
||||
Reference in New Issue
Block a user